This notebook describes the 106,229 hospitalized COVID-19 positive patients that were studied as part of the Consortium for Clinical Characterization of COVID-19 by EHR Neurology group.

library(tidyverse)
library(gridGraphics)
library(gridExtra)
library(ggpubr)
library(DT)
library(kableExtra)
library(cowplot)
library(RColorBrewer)
library(glue)
library(egg)
source("R/plot_theme.R")
source("R/demo_tables.R")
source("R/utils.R")

Import Data from each Healthcare system

# read in files from results folder 
# this folder contains all of the local healthcare system level analyses
rdas <- list.files(
  path = "results",
  pattern = ".rda",
  full.names = TRUE
)


for (rda in rdas) {
  load(rda)
}

rm(rdas, rda)

# create a list of participating healthcare systems from our study tracking spreadsheet
site_google_url <- "https://docs.google.com/spreadsheets/d/1epcYNd_0jCUMktOHf8mz5v651zy1JALD6PgzobrGWDY/edit?usp=sharing"

# load site parameters
site_params <- googlesheets4::read_sheet(site_google_url, sheet = 1)
site_avails <- googlesheets4::read_sheet(site_google_url, sheet = 2)

# filter the list of sites who ran the analysis
sorted_sites <- site_avails %>%
  filter(!is.na(date_v4_received)) %>%
  pull(siteid) %>%
  paste("results", sep = "_")

# list sites without race
sites_wo_race <- site_params %>%
  filter(!include_race) %>%
  pull(siteid)

# combine all rda files with 'results' in name
results <- mget(ls(pattern = "results"))

## load in the pre-processed comorbidity and meta-data
load("processed/comorbidity_table_cnps.rda")
load("processed/neuro_pt_counts.rda") 

Pre-processing

1. Format primary demographic characteristics

# create list of hospitals for adult and pediatric analyses
adult_sites = sorted_sites[!sorted_sites %in% c("BCH_results", "GOSH_results")]

pediatric_sites = sorted_sites[!sorted_sites %in% c("VA1_results", "VA2_results", "VA3_results", "VA4_results", "VA5_results")]

demo_table_adult <- create_demo_tableone(sorted_sites = adult_sites, is_pediatric = FALSE)

demo_table_pediatric <- create_demo_tableone(sorted_sites = pediatric_sites, is_pediatric = TRUE)

demo_table_combine <- rbind(demo_table_adult, demo_table_pediatric)

2. Format clinical characteristics (i.e: continuous variables such as median time to discharge)

clinical_table_adult <- create_clinical_tableone(sorted_sites = adult_sites, is_pediatric = FALSE)

clinical_table_pediatric <- create_clinical_tableone(sorted_sites = pediatric_sites, is_pediatric = TRUE)

clinical_table_combine <- rbind(clinical_table_adult, clinical_table_pediatric)

3. Conduct additional pre-processing of clinical variables

clinical_table_adult_clean <- clean_clinical_tables(clinical_table = clinical_table_adult)

clinical_table_pediatric_clean <- clean_clinical_tables(clinical_table = clinical_table_pediatric)

clinical_table_clean_combine <- rbind(clinical_table_adult_clean, clinical_table_pediatric_clean)

Add Comorbidity data

comorbidity_table_adults <- comorb_table_wide %>%
  filter(population == "Adult") %>% 
  arrange(desc(Comorb_Total)) %>% 
  slice(1:4) # return top comorbidities

comorbidity_table_pediatric <- comorb_table_wide %>%
  filter(population == "Pediatric") %>% 
  arrange(desc(Comorb_Total)) %>% 
  slice(1:4) # return top comorbidities

top_comorb_adults <- comorbidity_table_adults$Comorbidity
top_comorb_pediatrics <- comorbidity_table_pediatric$Comorbidity

Demographics

Table One - Combined

neuro_pt_counts_sum <- colSums(neuro_pt_counts[,-1]) %>% 
  data.frame() %>% 
  t() %>% 
  data.frame()

# specify the order of Table 1
row_order_adult <- c(
  "All Patients", "Female", "Male", "Unknown Sex",
  "0-2", "3-5", "6-11", "12-17", "18-25",
  "26-49", "50-69", "70-79", "80+", "Unknown Age",
  "American Indian", "Asian", "Black",
  "Hawaiian/Pacific Islander",
  "Hispanic/Latino", "White", "Other",
  top_comorb_adults, 
  "Median Elixhauser score  (sd) ",
  "Median pre admission cns  (sd) ",
  "Median pre admission pns  (sd) ",
  "Non-Severe", "Severe",
  "Median time to severe  (sd) ",
  "Alive", "Deceased",
  "Median time to death  (sd) ",
  "Discharged", "Not Discharged",
  "Median time to first discharge  (sd) ",
  "Not Readmitted", "Readmitted",
  "Median time to first readmission  (sd) ",
  "Median number of readmissions  (sd) "
  )


tableone_combine <- create_tableone(demo_table = demo_table_combine,
                                    clinical_table = clinical_table_clean_combine) %>%
  rbind(., comorb_table_wide %>%
          group_by(Comorbidity) %>%
          mutate(Comorb_Total = sum(Comorb_Total, na.rm = TRUE),
          None_sum = sum(None_Total, na.rm = TRUE),
          CNS_sum = sum(CNS_Total, na.rm = TRUE),
          PNS_sum = sum(PNS_Total, na.rm = TRUE),
          None_perc = round(None_sum/neuro_pt_counts_sum$None_n*100,1),
          Central_perc = round(CNS_sum/neuro_pt_counts_sum$CNS_n*100,1),
          Peripheral_perc = round(PNS_sum/neuro_pt_counts_sum$PNS_n*100,1),
          None = paste(None_sum, "(", None_perc, ")"),
          Central = paste(CNS_sum, "(", Central_perc, ")"),
          Peripheral = paste(PNS_sum, "(", Peripheral_perc, ")")) %>%
          distinct(Comorbidity, Comorb_Total, None, Central, Peripheral) %>%
          rename(`Table 1` = "Comorbidity",
          N = "Comorb_Total")) %>%
  slice(match(row_order_adult, `Table 1`))

kbl(tableone_combine %>%
      rename("No Neurological Condition (NNC)" = None)) %>%
      add_header_above(c(
      " ",
      "Total Patients" = 1,
      "Neurological Disease" = 3
      )) %>%
      kable_paper("striped", full_width = F) %>%
      pack_rows("Sex", 2, 4) %>%
      pack_rows("Age", 5, 13) %>%
      pack_rows("Race & Ethnicity", 14, 20) %>%
      pack_rows("Past Medical History", 21, 27) %>%
      pack_rows("Severity", 28, 30) %>%
      pack_rows("Survival", 31, 33) %>%
      pack_rows("Discharge", 34, 36) %>%
      pack_rows("Readmission", 37, 40)
Total Patients
Neurological Disease
Table 1 N No Neurological Condition (NNC) Central Peripheral
All Patients 106229 88340 (100%) 15101 (100%) 2788 (100%)
Sex
Female 37590 32399 (36.7%) 4247 (28.1%) 944 (33.9%)
Male 68638 55940 (63.3%) 10854 (71.9%) 1844 (66.1%)
Unknown Sex 1 1 (0%) 0 (0%) 0 (0%)
Age
0-2 769 747 (0.8%) 22 (0.1%) 0 (0%)
3-5 275 251 (0.3%) 24 (0.2%) 0 (0%)
6-11 403 368 (0.4%) 33 (0.2%) 2 (0.1%)
12-17 707 637 (0.7%) 60 (0.4%) 10 (0.4%)
18-25 2328 2138 (2.4%) 145 (1%) 45 (1.6%)
26-49 17799 16122 (18.3%) 1166 (7.7%) 511 (18.5%)
50-69 36885 31556 (35.7%) 4169 (27.7%) 1160 (41.9%)
70-79 24860 19672 (22.3%) 4508 (30%) 680 (24.6%)
80+ 22094 16813 (19%) 4920 (32.7%) 361 (13%)
Race & Ethnicity
American Indian 350 295 (0.3%) 55 (0.4%) 0 (0%)
Asian 1694 1464 (1.7%) 205 (1.4%) 25 (0.9%)
Black 16815 13368 (15.1%) 2995 (19.9%) 452 (16.5%)
Hawaiian/Pacific Islander 297 255 (0.3%) 41 (0.3%) 1 (0%)
Hispanic/Latino 870 819 (0.9%) 29 (0.2%) 22 (0.8%)
White 41871 33360 (37.8%) 7401 (49.1%) 1110 (40.5%)
Other 44220 38750 (43.9%) 4336 (28.8%) 1134 (41.3%)
Past Medical History
Hypertension 35587 27323 ( 30.9 ) 7277 ( 48.2 ) 987 ( 35.4 )
Alcohol abuse 30130 23065 ( 26.1 ) 6194 ( 41 ) 871 ( 31.2 )
Drug abuse 26596 20287 ( 23 ) 5528 ( 36.6 ) 781 ( 28 )
Diabetes 21969 16905 ( 19.1 ) 4429 ( 29.3 ) 635 ( 22.8 )
Median Elixhauser score (sd) 0.3 (0.7) 0.2 (0.5) 1.4 (2) 0.6 (1.7)
Median pre admission cns (sd) 0 (0) 0 (0) 0 (0) 0 (0)
Median pre admission pns (sd) 0 (0) 0 (0) 0 (0) 0 (0.1)
Severity
Non-Severe 66252 57771 (65.4%) 7007 (46.4%) 1474 (52.7%)
Severe 39985 30576 (34.6%) 8084 (53.6%) 1325 (47.3%)
Median time to severe (sd) 0.6 (1) 0.8 (1.3) 0.4 (0.7) 0.8 (1.1)
Survival
Alive 87376 74389 (84.2%) 10392 (68.8%) 2595 (93.1%)
Deceased 18849 13953 (15.8%) 4705 (31.2%) 191 (6.9%)
Median time to death (sd) 15.3 (6.4) 15.9 (5.8) 22.4 (32.5) 36.9 (41.4)
Discharge
Discharged 103165 85639 (96.9%) 14801 (98%) 2725 (97.7%)
Not Discharged 3063 2701 (3.1%) 297 (2%) 65 (2.3%)
Median time to first discharge (sd) 8.4 (4.2) 6.5 (3.6) 12.8 (6.1) 14.5 (18.3)
Readmission
Not Readmitted 91646 76877 (87%) 12435 (82.4%) 2334 (83.4%)
Readmitted 14569 11456 (13%) 2650 (17.6%) 463 (16.6%)
Median time to first readmission (sd) 28.1 (20.3) 26.4 (17.6) 31.9 (33.7) 38.1 (28.4)
Median number of readmissions (sd) 0.1 (0.3) 0 (0) 0.1 (0.3) 0.1 (0.3)
write.csv(tableone_combine, 'tables/table1.csv', row.names = FALSE)

Table One - Adults

tableone_adult <- create_tableone(demo_table = demo_table_adult,
                                  clinical_table = clinical_table_adult_clean) %>%
                                  rbind(., comorbidity_table_adults %>%
                                          select(Comorbidity, Comorb_Total, `NNC_N_%`, `CNS_N_%`, `PNS_N_%`) %>%
                                          rename(`Table 1` = "Comorbidity",
                                                 N = "Comorb_Total",
                                                 None = "NNC_N_%",
                                                 Central = "CNS_N_%",
                                                 Peripheral = "PNS_N_%")) %>% 
  slice(match(row_order_adult, `Table 1`))

kbl(tableone_adult %>%
    rename("No Neurological Condition (NNC)" = None)) %>%
  add_header_above(c(
  " ",
  "Total Patients" = 1,
  "Neurological Disease" = 3
  )) %>%
  kable_paper("striped", full_width = F) %>%
  pack_rows("Sex", 2, 4) %>%
  pack_rows("Age", 5, 9) %>%
  pack_rows("Race & Ethnicity", 10, 16) %>%
  pack_rows("Past Medical History", 17, 23) %>%
  pack_rows("Severity", 24, 26) %>%
  pack_rows("Survival", 27, 29) %>%
  pack_rows("Discharge", 30, 32) %>%
  pack_rows("Readmission", 33, 36)
Total Patients
Neurological Disease
Table 1 N No Neurological Condition (NNC) Central Peripheral
All Patients 104031 86321 (100%) 14938 (100%) 2772 (100%)
Sex
Female 36564 31445 (36.4%) 4185 (28%) 934 (33.7%)
Male 67466 54875 (63.6%) 10753 (72%) 1838 (66.3%)
Unknown Sex 1 1 (0%) 0 (0%) 0 (0%)
Age
18-25 2328 2138 (2.5%) 145 (1%) 45 (1.6%)
26-49 17799 16122 (18.7%) 1166 (7.8%) 511 (18.5%)
50-69 36885 31556 (36.6%) 4169 (28%) 1160 (42.1%)
70-79 24860 19672 (22.8%) 4508 (30.2%) 680 (24.7%)
80+ 22094 16813 (19.5%) 4920 (33%) 361 (13.1%)
Race & Ethnicity
American Indian 349 294 (0.3%) 55 (0.4%) 0 (0%)
Asian 1633 1412 (1.6%) 196 (1.3%) 25 (0.9%)
Black 16631 13202 (15.3%) 2979 (20%) 450 (16.5%)
Hawaiian/Pacific Islander 296 254 (0.3%) 41 (0.3%) 1 (0%)
Hispanic/Latino 850 799 (0.9%) 29 (0.2%) 22 (0.8%)
White 41206 32752 (38%) 7354 (49.3%) 1100 (40.3%)
Other 42971 37587 (43.6%) 4254 (28.5%) 1130 (41.4%)
Past Medical History
Hypertension 35566 27302 ( 31.6 %) 7277 ( 48.7 %) 987 ( 35.6 %)
Alcohol abuse 29918 22879 ( 26.5 %) 6169 ( 41.3 %) 870 ( 31.4 %)
Drug abuse 26389 20097 ( 23.3 %) 5512 ( 36.9 %) 780 ( 28.1 %)
Diabetes 21952 16888 ( 19.6 %) 4429 ( 29.6 %) 635 ( 22.9 %)
Median Elixhauser score (sd) 0.3 (0.7) 0.2 (0.5) 1.4 (2) 0.6 (1.7)
Median pre admission cns (sd) 0 (0) 0 (0) 0 (0) 0 (0)
Median pre admission pns (sd) 0 (0) 0 (0) 0 (0) 0 (0.1)
Severity
Non-Severe 64578 56181 (65.1%) 6935 (46.4%) 1462 (52.6%)
Severe 39468 30140 (34.9%) 8008 (53.6%) 1320 (47.4%)
Median time to severe (sd) 0.6 (1) 0.8 (1.3) 0.4 (0.7) 0.8 (1.1)
Survival
Alive 85212 72388 (83.9%) 10247 (68.6%) 2577 (93.1%)
Deceased 18820 13933 (16.1%) 4696 (31.4%) 191 (6.9%)
Median time to death (sd) 15.3 (6.4) 15.9 (5.8) 22.4 (32.5) 36.9 (41.4)
Discharge
Discharged 101032 83675 (96.9%) 14650 (98%) 2707 (97.7%)
Not Discharged 3005 2645 (3.1%) 295 (2%) 65 (2.3%)
Median time to first discharge (sd) 8.4 (4.2) 6.5 (3.6) 12.8 (6.1) 14.5 (18.3)
Readmission
Not Readmitted 89802 75170 (87.1%) 12312 (82.4%) 2320 (83.5%)
Readmitted 14237 11151 (12.9%) 2626 (17.6%) 460 (16.5%)
Median time to first readmission (sd) 28.1 (20.3) 26.4 (17.6) 31.9 (33.7) 38.1 (28.4)
Median number of readmissions (sd) 0.1 (0.3) 0 (0) 0.1 (0.3) 0.1 (0.3)

Table One - Pediatrics

# specify the order of Table 1
row_order_pediatric <- c(
  "All Patients", "Female", "Male", "Unknown Sex",
  "0-2", "3-5", "6-11", "12-17", "18-25",
  "26-49", "50-69", "70-79", "80+", "Unknown Age",
  "American Indian", "Asian", "Black",
  "Hawaiian/Pacific Islander",
  "Hispanic/Latino", "White", "Other",
  top_comorb_pediatrics, 
  "Median Elixhauser score  (sd) ",
  "Median pre admission cns  (sd) ",
  "Median pre admission pns  (sd) ",
  "Non-Severe", "Severe",
  "Median time to severe  (sd) ",
  "Alive", "Deceased",
  "Median time to death  (sd) ",
  "Discharged", "Not Discharged",
  "Median time to first discharge  (sd) ",
  "Not Readmitted", "Readmitted",
  "Median time to first readmission  (sd) ",
  "Median number of readmissions  (sd) "
  )

tableone_pediatric <- create_tableone(demo_table = demo_table_pediatric,
                                      clinical_table = clinical_table_pediatric_clean)  %>%
  rbind(., comorbidity_table_pediatric %>%
          select(Comorbidity, Comorb_Total, `NNC_N_%`, `CNS_N_%`, `PNS_N_%`) %>% 
          rename(`Table 1` = "Comorbidity",
                  N = "Comorb_Total",
                  None = "NNC_N_%",
                  Central = "CNS_N_%",
                  Peripheral = "PNS_N_%")) %>% 
  slice(match(row_order_pediatric, `Table 1`))

kbl(tableone_pediatric %>%
      rename("No Neurological Condition (NNC)" = None)) %>%
  add_header_above(c(
  " ",
  "Total Patients" = 1,
  "Neurological Disease" = 3
  )) %>%
  kable_paper("striped", full_width = F) %>%
  pack_rows("Sex", 2, 4) %>%
  pack_rows("Age", 5, 8) %>%
  pack_rows("Race & Ethnicity", 9, 15) %>%
  pack_rows("Past Medical History", 16, 22) %>%
  pack_rows("Severity", 23, 25) %>%
  pack_rows("Survival", 26, 28) %>%
  pack_rows("Discharge", 29, 31) %>%
  pack_rows("Readmission", 32, 35)
Total Patients
Neurological Disease
Table 1 N No Neurological Condition (NNC) Central Peripheral
All Patients 2198 2019 (100%) 163 (100%) 16 (100%)
Sex
Female 1026 954 (47.3%) 62 (38%) 10 (62.5%)
Male 1172 1065 (52.7%) 101 (62%) 6 (37.5%)
Unknown Sex 0 0 (0%) 0 (0%) 0 (0%)
Age
0-2 769 747 (37.3%) 22 (15.8%) 0 (0%)
3-5 275 251 (12.5%) 24 (17.3%) 0 (0%)
6-11 403 368 (18.4%) 33 (23.7%) 2 (16.7%)
12-17 707 637 (31.8%) 60 (43.2%) 10 (83.3%)
Race & Ethnicity
American Indian 1 1 (0%) 0 (0%) 0 (0%)
Asian 61 52 (2.6%) 9 (5.8%) 0 (0%)
Black 184 166 (8.3%) 16 (10.4%) 2 (12.5%)
Hawaiian/Pacific Islander 1 1 (0%) 0 (0%) 0 (0%)
Hispanic/Latino 20 20 (1%) 0 (0%) 0 (0%)
White 665 608 (30.2%) 47 (30.5%) 10 (62.5%)
Other 1249 1163 (57.8%) 82 (53.2%) 4 (25%)
Past Medical History
Alcohol abuse 212 186 ( 9.2 %) 25 ( 15.3 %) 1 ( 6.2 %)
Drug abuse 207 190 ( 9.4 %) 16 ( 9.8 %) 1 ( 6.2 %)
Weight loss 123 109 ( 5.4 %) 13 ( 8 %) 1 ( 6.2 %)
Cardiac arrhythmias 109 94 ( 4.7 %) 12 ( 7.4 %) 3 ( 18.8 %)
Median Elixhauser score (sd) 2.2 (4.5) 0 (0) 4.4 (9.2) 2.5 (3.6)
Median pre admission cns (sd) 0.2 (0.7) 0.1 (0.2) 1.9 (3.9) 0 (0)
Median pre admission pns (sd) 0 (0) 0 (0) 0 (0) 0.2 (0.6)
Severity
Non-Severe 1674 1590 (78.5%) 72 (48.6%) 12 (70.6%)
Severe 517 436 (21.5%) 76 (51.4%) 5 (29.4%)
Median time to severe (sd) 2.4 (5.3) 3.8 (9.5) 10.8 (33.1) 0.9 (2.5)
Survival
Alive 2164 2001 (99%) 145 (94.2%) 18 (100%)
Deceased 29 20 (1%) 9 (5.8%) 0 (0%)
Median time to death (sd) 12.6 (36.3) 53.5 (76.3) 5.8 (10.2) 95 (268.7)
Discharge
Discharged 2133 1964 (97.2%) 151 (98.7%) 18 (100%)
Not Discharged 58 56 (2.8%) 2 (1.3%) 0 (0%)
Median time to first discharge (sd) 5.1 (4.2) 3.8 (4.2) 5.8 (2.7) 5.9 (4.5)
Readmission
Not Readmitted 1844 1707 (84.8%) 123 (83.7%) 14 (82.4%)
Readmitted 332 305 (15.2%) 24 (16.3%) 3 (17.6%)
Median time to first readmission (sd) 20 (20.2) 20.1 (19) 75.3 (86) 14.4 (22.8)
Median number of readmissions (sd) 0.1 (0.3) 0 (0) 0.2 (0.4) 0.4 (0.5)

Sample Size

n_adult <- as.numeric(tableone_adult$N)[1]
n_pediatric <- as.numeric(tableone_pediatric$N)[1]

Demographic & Clinical Course Analysis

Evaluate characteristics among neuro status groups

1. Conduct chi-squared tests for categorical demographic variables

compute_chi_square <- function(tableone, demo_table) {
  
  N = tableone %>% select(N) %>% head(1) %>% as.numeric()

  # sum of all neuro condition groups
  cns_n = as.numeric(gsub( " .*$", "", tableone[1,"Central"]))
  pns_n = as.numeric(gsub( " .*$", "", tableone[1,"Peripheral"]))
  none_n = as.numeric(gsub( " .*$", "", tableone[1,"None"]))
  
  # sum up the total counts of each demographic variable across healthcare systems
  tableOne_sums <- demo_table %>%
    group_by(variable, Demo_var, Demo_var_i) %>%
    summarise(across(
      starts_with("n_var"),
      function(x) sum(x, na.rm = TRUE)
    ), .groups = "drop")
  
  # create list of demographic/clinical variables
  vars = tableOne_sums$variable
  
  # for binary variables, we should group by category 
  # Thus, only run the analysis for sex.male rather than both sex.female AND sex.male which would be redundant
  # we will also remove `age_group.unknown` since their is only 1 patient here
  exclude <- c("readmitted.false", "sex.female", "sex.other", "survival.deceased", "severity.non-severe",
               "covid_discharged.discharged", "age_group.Unknown")
  vars <- vars[!vars %in% exclude]
  
  # create empty list for chi.square test results
  chi_result_list = list()
  
  # for each variable, we will run the chi.square test
  for(i in vars) {
    test = calc_chisq(i, tableOne_sums, none_n, pns_n, cns_n)
    X2 <- round(test$statistic, 4)
    p_value <- test$p.value
    df <- test$parameter
    Variable = paste(i)
    
    chi_results <- cbind(Variable, X2, p_value, df) %>% 
      data.frame() %>% 
      mutate(p_value = as.numeric(p_value))
    
    rownames(chi_results) <- NULL
    
    chi_result_list[[i]] <- chi_results
  }
  
  # save list of chi.square test results
  chisq_results <- bind_rows(chi_result_list)
  
  return(chisq_results)
  
  
  }

Combined Adult & Pediatric

chisq_combine <- compute_chi_square(tableone = tableone_combine %>%
                                      filter(!`Table 1` == 'Discharged',
                                             !`Table 1` == 'Not Discharged'),
                                    demo_table = demo_table_combine %>%
                                      filter(!variable == 'covid_discharged.discharged',
                                             !variable == 'covid_discharged.not discharged'))

datatable(chisq_combine %>% arrange(p_value))

Adult

chisq_adult <- compute_chi_square(tableone = tableone_adult %>%
                                      filter(!`Table 1` == 'Discharged',
                                             !`Table 1` == 'Not Discharged'),
                                    demo_table = demo_table_adult %>%
                                      filter(!variable == 'covid_discharged.discharged',
                                             !variable == 'covid_discharged.not discharged'))

datatable(chisq_adult %>% arrange(p_value))

Pediatric

chisq_pediatric <- compute_chi_square(tableone = tableone_pediatric %>%
                                      filter(!`Table 1` == 'Discharged',
                                             !`Table 1` == 'Not Discharged'),
                                    demo_table = demo_table_pediatric %>%
                                      filter(!variable == 'covid_discharged.discharged',
                                             !variable == 'covid_discharged.not discharged'))
## Warning in chisq.test(unlist(mat)): Chi-squared approximation may be incorrect

## Warning in chisq.test(unlist(mat)): Chi-squared approximation may be incorrect

## Warning in chisq.test(unlist(mat)): Chi-squared approximation may be incorrect

## Warning in chisq.test(unlist(mat)): Chi-squared approximation may be incorrect

## Warning in chisq.test(unlist(mat)): Chi-squared approximation may be incorrect

## Warning in chisq.test(unlist(mat)): Chi-squared approximation may be incorrect

## Warning in chisq.test(unlist(mat)): Chi-squared approximation may be incorrect

## Warning in chisq.test(unlist(mat)): Chi-squared approximation may be incorrect

## Warning in chisq.test(unlist(mat)): Chi-squared approximation may be incorrect

## Warning in chisq.test(unlist(mat)): Chi-squared approximation may be incorrect

## Warning in chisq.test(unlist(mat)): Chi-squared approximation may be incorrect
datatable(chisq_pediatric %>% arrange(p_value))

2. Conduct anova (Kruskal-Wallis) tests for continuous clinical variables

compute_kruskal <- function(clinical_table) {

  # create empty list to save processed variables
  processed_list_figs <- list()
  
  # create a list of clinical variables to further pre-process
  vars_to_process <- unique(clinical_table$name)
  
  # for each variable, we will remove the [min, max] as before
  for (i in vars_to_process) {
    mod_table <- clinical_table %>%
      rename("var" = name) %>%
      filter(var == i) %>%
      mutate(
        site = toupper(site),
        None = sub("(\\(.*|\\[.*)", "", None),
        Peripheral = sub("(\\(.*|\\[.*)", "", Peripheral),
        Central = sub("(\\(.*|\\[.*)", "", Central)
      ) %>%
      mutate(across(None:Central, as.numeric))
  
    processed_list_figs[[i]] <- mod_table
  }
  
  clinical_table_anova <- bind_rows(processed_list_figs)
  
  # format the continuous variables
  cont_vars = clinical_table_anova %>%
    mutate(
      # create a variable without mean/median prefix
      grouped_var = gsub("Mean |Median | \\[Min, Max\\]| \\(SD\\)", "", var)
    ) %>%
    pivot_longer(
      cols = None:Central,
      names_to = "type"
    ) %>%
    mutate(type = factor(type, levels = c("None", "Central", "Peripheral"))) %>% 
    # select variables to analyze
    filter(grouped_var %in% c("Elixhauser score",
                              "pre admission cns", 
                              "pre admission pns", 
                              "time to death", 
                              "time to first discharge", 
                              "time to severe",
                              "number of readmissions",
                              "time to first readmission")) %>% 
    distinct()
  
  # create function to conduct anova
  cont_var_results <- function(variable) {
    
    df <- cont_vars %>% 
    filter(grouped_var == paste(variable))
    
    # kruskal.test calculates the kruskal-Wallis H-statistic 
    # does not assume normality between groups
    # also called the one-way ANOVA on ranks
    test = kruskal.test(df$value ~ df$type)
    
  }
  
  # create empty list for Kruskal-Wallis results
  cont_vars_list = list()
  
  # create list of unique variables
  outcome_cont_vars = unique(cont_vars$grouped_var)
  
  for(i in outcome_cont_vars) {
    
    test = cont_var_results(i)
    X2 <- round(test$statistic, 4)
    p_value <- test$p.value
    df <- test$parameter
    Variable = paste(i)
    
    kw <- cbind(Variable, X2, p_value, df) %>% 
      data.frame() 
    
    rownames(kw) <- NULL
    
    cont_vars_list[[i]] <- kw
  
    
  }
  
  anova_results <- bind_rows(cont_vars_list)
  
  }

Adult & Pediatric Kruskall-Wallis

kw_combine <- compute_kruskal(clinical_table = clinical_table_combine)
datatable(kw_combine %>% arrange(p_value))

Adult Clinical Kruskall-Wallis

Note: median CNS is 0 for all sites (reason for the NA values)

kw_adult <- compute_kruskal(clinical_table = clinical_table_adult)
datatable(kw_adult %>% arrange(p_value))

Pediatric Kruskall-Wallis

kw_pediatric <- compute_kruskal(clinical_table = clinical_table_pediatric)
datatable(kw_pediatric %>% arrange(p_value))

3. Combine chi-square and Kruskal-Wallis results

We will evaluate significance when controlling for False Discovery Rate (FDR)

tableone_stats <- function(chisq_results, anova_results, population) {

  demo_stats <- rbind(chisq_results %>% select(Variable, p_value),
                      anova_results %>% select(Variable, p_value))
  
  # adjust p-values with FDR
  demo_stats$adj_p_value <- p.adjust(demo_stats$p_value, method = "fdr")
  
  # round & format p-value 
  demo_stats <- demo_stats %>% 
    mutate(p_value = as.numeric(p_value),
           pvalue = if_else(p_value < 0.001, "< 0.001", paste(round(p_value, 3))),
           adj.p_value = if_else(adj_p_value < 0.001, "< 0.001", paste(round(adj_p_value, 3))))
  
  # tidy up the demographics stat table
  demo_stats_tidy <- demo_stats %>% 
    rename(`Unadjusted P-value (raw)` = p_value,
           `FDR Adjusted P-value (raw)` = adj_p_value,
           `Unadjusted P-value (tidy)` = pvalue,
           `FDR Adjusted P-value (tidy)` = adj.p_value) %>% 
    select(Variable, `Unadjusted P-value (raw)`, `FDR Adjusted P-value (raw)`, `Unadjusted P-value (tidy)`, `FDR Adjusted P-value (tidy)`)

  write.csv(demo_stats_tidy, paste0("tables/Table1_pvals_", population, ".csv"), row.names = FALSE)
  
  return(demo_stats_tidy)
  
}

Incorporate Comorbidities

# define matrix for each comorbidity
comorb_matrix <- function(comorbidity_table, comorbidity, population) {

    if(population=='Combined') {
      comorbidity_table = comorbidity_table
    } else if (population=="Adult") {
      comorbidity_table = comorbidity_table %>% 
        filter(population == "Adult")
    } else if (population=="Pediatric") {
      comorbidity_table = comorbidity_table %>% 
        filter(population == "Pediatric")
    } else {
      print('incorrect population specification')
    }
  
  mat_comorb <- comorbidity_table %>%
    filter(Comorbidity == paste(comorbidity)) %>%
    group_by(Comorbidity) %>% 
    mutate(None_comorb = sum(None_Total, na.rm = TRUE),
    CNS_comorb = sum(CNS_Total, na.rm = TRUE),
    PNS_comorb = sum(PNS_Total, na.rm = TRUE)) %>%
    ungroup() %>%
    distinct(None_comorb, CNS_comorb, PNS_comorb) %>%
    mutate(none_dif = neuro_pt_counts_sum$None_n - None_comorb,
    pns_dif = neuro_pt_counts_sum$PNS_n - PNS_comorb,
    cns_dif = neuro_pt_counts_sum$CNS_n - CNS_comorb) %>%
    data.frame() %>%
    #as.integer() %>%
    matrix(nrow = 2, ncol = 3, byrow = TRUE)

  comorb_results <- chisq.test(unlist(mat_comorb))
  
  results_df <- data.frame(Variable = paste(comorbidity),
                                X2 = round(comorb_results$statistic, 4),
                                p_value = comorb_results$p.value,
                                df = comorb_results$parameter)
  row.names(results_df) <- NULL
  
  return(results_df)
}

## combined comorbidities
comorbidity_combined_chi_list <- list()

for(i in top_comorb_adults) {
  
  comorb_chi_results <- comorb_matrix(comorbidity_table = comorb_table_wide, i, population = "Combined")
  comorbidity_combined_chi_list[[i]] <- comorb_chi_results
}

comorbidity_combined_chi <- comorbidity_combined_chi_list %>%
  bind_rows() 

chisq_combine_comorb <- chisq_combine %>%
  mutate(X2 = as.numeric(X2),
  df = as.numeric(df)) %>%
  bind_rows(comorbidity_combined_chi) 



## adult comorbidities
comorbidity_adult_chi_list <- list()

for(i in top_comorb_adults) {
  
  comorb_chi_results <- comorb_matrix(comorbidity_table = comorb_table_wide, i, population = "Adult")
  comorbidity_adult_chi_list[[i]] <- comorb_chi_results
}

comorbidity_adult_chi <- comorbidity_adult_chi_list %>%
  bind_rows() 

chisq_adult_comorb <- chisq_adult %>%
  mutate(X2 = as.numeric(X2),
  df = as.numeric(df)) %>%
  bind_rows(comorbidity_adult_chi) 

## pediatric comorbidities
comorbidity_pediatric_chi_list <- list()

for(i in top_comorb_pediatrics) {
  
  comorb_chi_results <- comorb_matrix(comorbidity_table = comorb_table_wide, i, population = "Pediatric")
  comorbidity_pediatric_chi_list[[i]] <- comorb_chi_results
}

comorbidity_pediatric_chi <- comorbidity_pediatric_chi_list %>%
  bind_rows() 

chisq_pediatric_comorb <- chisq_pediatric %>%
  mutate(X2 = as.numeric(X2),
  df = as.numeric(df)) %>%
  bind_rows(comorbidity_pediatric_chi_list) 

Table One - Combined Analysis

combine_tableone_stats <- tableone_stats(chisq_results = chisq_combine_comorb,
                                         anova_results = kw_combine,
                                         population = "combined") 
  

datatable(combine_tableone_stats)

Table One - Adult Analysis

adult_tableone_stats <- tableone_stats(chisq_results = chisq_adult_comorb,
                                         anova_results = kw_adult,
                                         population = "adult") 
  

datatable(adult_tableone_stats)

Table One - Pediatric Analysis

pediatric_tableone_stats <- tableone_stats(chisq_results = chisq_pediatric_comorb,
                                         anova_results = kw_pediatric,
                                         population = "pediatric") 
  

datatable(pediatric_tableone_stats)

Neurological Diagnosis Analysis

Evaluate CNS and PNS neurological diagnoses in our patient cohort

Format Diagnostic code data

# import list of ICD-9/ICD-10 codes
neuro_icds_10 <-
  readxl::read_excel("public-data/2020-02-22_neuro-icd10_CNSvPNS.xlsx", sheet = 2) %>% 
  rename(
    "icd" = `ICD-10`,
    "pns_cns" = `Nervous system Involvement (1=central, 2=peripheral, 3=irrelevant)`,
    "type" = `ICD-10_type (1=first three alphanumeric code only, 2=digits after decimal point)`) %>%
  filter(type == 1) %>% 
  mutate(pns_cns = as.factor(pns_cns) %>% fct_recode(
    Central = "1",
    Peripheral = "2"
  )) %>%
  distinct(icd, `Neurological Disease Category`, pns_cns, `ICD-10 Description Revised`) %>%
  rename("description" = `ICD-10 Description Revised`) %>%
  mutate(concept_type = "icd-10")

neuro_icds_9 <- read.csv("public-data/icd9_tab_CNSvPNS.csv") %>%
  rename(
    "Neurological Disease Category" = "Neurological.Disease.Category",
    "pns_cns" = `Nervous.system.Involvement..1.central..2.peripheral.`,
    "icd_description" = `icd9_desc_revised`
  ) %>%
  mutate(
    pns_cns = as.factor(pns_cns) %>% fct_recode(
      Central = "1",
      Peripheral = "2"
    ),
    concept_type = "DIAG-ICD9"
  ) %>%
  select(icd, `Neurological Disease Category`, pns_cns, icd_description) %>%
  rename("description" = icd_description) %>%
  mutate(concept_type = "icd-9")

# bind list of ICD-9/ICD-10 codes
icds <- rbind(neuro_icds_10, neuro_icds_9)

Calculate total number of neuro diagnoses for each adults & pediatrics

compute_neuro_counts <- function(sorted_sites, sample_size, is_pediatric = FALSE) {
  
    # create empty list to save processed diagnostic data
    diag_table_list <- list()
  
    if(is_pediatric==FALSE) {
      population = "adults"
      } else {
      population = "pediatrics"
      }
  
  # for each healthcare system, we will calculate the total number of patients with each ICD code
  for (i in sorted_sites) {
    tmp <- get(i)
    tmp_diag <- tmp[[c(
      "first_hosp_results",
      "icd_tables",
      paste0("icd_tables_", population),
      "demo_table"
    )]]
    tmp_diag$site <- tmp[["site"]]
    diag_table_list[[i]] <- tmp_diag %>%
      filter(variable == "sex.female" | variable == "sex.male" | variable == "sex.other") %>%
      select(site, contains("n_var"))
  }
  
  diag_table_wide <- bind_rows(diag_table_list)
  
  # we will count the total number and percent across healthcare systems
  diag_counts <- colSums(Filter(is.numeric, diag_table_wide), na.rm = TRUE) %>% 
    data.frame() %>% 
    mutate(perc = round(./sample_size * 100, 1))
  
  # format diagnostic table
  diag_counts <- tibble::rownames_to_column(diag_counts, "icd") %>%
    mutate(icd = gsub("n_var_", "", icd)) %>%
    rename(`Number of Patients` = ".") %>%
    full_join(., icds, by = "icd") %>%
    dplyr::arrange(desc(`Number of Patients`)) %>% 
    mutate(`Number of Patients (% of Cohort)` = paste(`Number of Patients`, "(", perc, ")", sep = " ")) %>% 
    select(pns_cns, `Neurological Disease Category`, concept_type, 
           icd, description, `Number of Patients (% of Cohort)`) %>% 
      rename(Code = "icd",
           `Nervous System` = "pns_cns",
           Description = "description",
           `ICD Version` = "concept_type") %>% 
    mutate(`Number of Patients (% of Cohort)` = if_else(`Number of Patients (% of Cohort)` == "NA ( NA )", "0 ( 0 )", `Number of Patients (% of Cohort)`),
           Code = if_else(Code == "NN", "No Neurological Disease (NNC)", Code))
  
  return(diag_counts)
  
  }
neuro_counts_adult <- compute_neuro_counts(sorted_sites = adult_sites,
                                           sample_size = n_adult, 
                                           is_pediatric = FALSE)

neuro_counts_pediatric <- compute_neuro_counts(sorted_sites = pediatric_sites,
                                           sample_size = n_pediatric, 
                                           is_pediatric = TRUE)

neuro_counts_combined <- neuro_counts_adult %>% 
  rename(`Number of Adult Patients (% of Cohort)` = `Number of Patients (% of Cohort)`) %>% 
  left_join(., neuro_counts_pediatric %>% 
              select(`ICD Version`, Code, `Number of Patients (% of Cohort)`) %>% 
              rename("Number of Pediatric Patients (% of Cohort`)" = `Number of Patients (% of Cohort)`))
## Joining, by = c("ICD Version", "Code")
write.csv(neuro_counts_combined, paste0("tables/Supp.Table2_diagnoses", ".csv"), row.names = FALSE)
datatable(neuro_counts_adult, caption = "Number (%) of Adult Patients with each Neurological Diagnosis") 
datatable(neuro_counts_pediatric, caption = "Number (%) of Pediatric Patients with each Neurological Diagnosis") 

Evaluate Diagnoses by Age & Outcome

compute_stratified_neuro_counts <- function(tableone, sorted_sites, is_pediatric = FALSE) {
  
  # create empty list to save processed diagnostic data
    diag_table_list <- list()
  
    if(is_pediatric==FALSE) {
      population = "adults"
      } else {
      population = "pediatrics"
      }
  
  # for each healthcare system, we will calculate the total number of patients with each ICD code
  for (i in sorted_sites) {
    #print(i)
    tmp <- get(i)
    tmp_diag <- tmp[[c(
      "first_hosp_results",
      "icd_tables",
      paste0("icd_tables_", population),
      "demo_table"
    )]]
    tmp_diag$site <- tmp[["site"]]
    try(
    diag_table_list[[i]] <- tmp_diag %>%
      select(site, variable, starts_with("n_var")) %>% 
      pivot_longer(cols = starts_with("n_var"), names_to = "code", values_to = "freq") %>% 
      ungroup()
    )
  }

  diag_table_wide_all <- bind_rows(diag_table_list) %>% 
      group_by(variable, code) %>% 
    # total number of patients who had each code for each variable across sites
      mutate(total_n_var  = sum(freq, na.rm = TRUE)) %>% 
    distinct(variable, code, total_n_var) %>% 
    mutate(code = gsub("n_var_", "", code),
           variable = gsub("Severity.", "", variable),
           variable = gsub("Survival.", "", variable),
           variable = gsub("readmitted", "", variable),
           variable = gsub("age_group", "", variable),
           variable = gsub("covid_discharged", "", variable),
           variable = if_else(variable == ".TRUE", "Readmitted", variable)) %>% 
    left_join(., icds %>% rename(code = "icd"), by = "code") %>% 
    ungroup() %>% 
    filter(!concept_type == "icd-9")


  # refactor neuro status
  diag_table_wide_all$pns_cns <- factor(diag_table_wide_all$pns_cns,
                                        levels=c("Central", "Peripheral"),
                                        labels=c("CNS", "PNS"))
  
  
  return(diag_table_wide_all)
  
  }
stratified_neuro_counts_adult <- compute_stratified_neuro_counts(sorted_sites = adult_sites,
                                                                 is_pediatric = FALSE,
                                                                 tableone = tableone_adult)

stratified_neuro_counts_pediatric <- compute_stratified_neuro_counts(sorted_sites = pediatric_sites,
                                                                 is_pediatric = TRUE,
                                                                 tableone = tableone_pediatric)
compute_stratified_neuro_counts_age <- function(stratified_neuro_counts, tableone) {
  
  stratified_neuro_counts$variable <- factor(stratified_neuro_counts$variable, 
                                     levels=c(".00to02", ".03to05", ".06to11", ".12to17",
                                              ".18to25", ".26to49", ".50to69", ".70to79", ".80plus"), 
                                     labels=c("0-2 Years", "3-5 Years", "6-11 Years", "12-17 Years",
                                              "18-25 Years", "26-49 Years", "50-69 Years", "70-79 Years", "80+ Years"))
  
  # total counts
  n_0_2 <- get_table_n(tableone, var = "0-2")
  n_3_5 = get_table_n(tableone, var = "3-5")
  n_6_11 = get_table_n(tableone, var = "6-11")
  n_12_17 = get_table_n(tableone, var = "12-17")
  n_18_25 = get_table_n(tableone, var = "18-25")
  n_26_49 = get_table_n(tableone, var = "26-49")
  n_50_69 = get_table_n(tableone, var = "50-69")
  n_70_79 = get_table_n(tableone, var = "70-79")
  n_80 = get_table_n(tableone, var = "80+")
  
  age_df <- data.frame("variable" = c("0-2 Years",
                                    "3-5 Years", 
                                    "6-11 Years", 
                                    "12-17 Years",
                                    "18-25 Years", 
                                    "26-49 Years", 
                                    "50-69 Years", 
                                    "70-79 Years", 
                                    "80+ Years"),
                     "n_var" = c(n_0_2,
                               n_3_5,
                               n_6_11,
                               n_12_17,
                               n_18_25,
                               n_26_49,
                               n_50_69,
                               n_70_79,
                               n_80))

  
  stratified_neuro_counts <- stratified_neuro_counts %>% 
    filter(grepl("Years", variable),
                !code == "NN", 
                !total_n_var == 0) %>% 
         # total_n would be calculating total codes across each age group which is not true reflection of patient counts b/c patients can have more than one code
         #mutate(total_n = sum(total_n_var)) %>% 
         ungroup() %>% 
         left_join(., age_df, by = "variable") %>% 
         mutate(n_var = as.numeric(n_var),
                perc = as.character(round(total_n_var/n_var*100,1)),
                perc = if_else(perc < 0.1, '<0.1', perc),
                perc = paste0(perc, "%")) 
  
  return(stratified_neuro_counts)
  
  
}
stratified_neuro_counts_age_adult <- compute_stratified_neuro_counts_age(stratified_neuro_counts = stratified_neuro_counts_adult,
                                                                         tableone = tableone_adult)

stratified_neuro_counts_age_pediatric <- compute_stratified_neuro_counts_age(stratified_neuro_counts = stratified_neuro_counts_pediatric,
                                                                 tableone = tableone_pediatric)

Diagnoses by Adult and Pediatric Populations

n_0_2 <- get_table_n(tableone_combine, var = "0-2")
#n_3_5 <- get_table_n(tableone_combine, var = "3-5")
n_6_11 <- get_table_n(tableone_combine, var = "6-11")
n_12_17 <- get_table_n(tableone_combine, var = "12-17")

total_ped_age_count = as.numeric(n_0_2) +  as.numeric(n_6_11) + as.numeric(n_12_17)

stratified_neuro_counts_age_pediatric_reformat = stratified_neuro_counts_age_pediatric %>%
  select(variable, pns_cns, description, n_var, total_n_var) %>%
  mutate(variable = "0-17 Years") %>%
  group_by(pns_cns, description) %>%
  mutate(total_n_var = sum(total_n_var, na.rm = TRUE)) %>%
  ungroup() %>%
  # n_var is the total per `age_group` stratification
  select(-n_var) %>%
  distinct() %>%
  mutate(perc = round(total_n_var/total_ped_age_count*100,1),
         perc = if_else(perc < 0.1, '<0.1', as.character(perc)),
         perc = paste0(perc, "%"))

stratified_neuro_counts_age_combined <- stratified_neuro_counts_age_adult %>%
  select(variable, pns_cns, description, total_n_var, perc) %>%
  rbind(., stratified_neuro_counts_age_pediatric_reformat %>%
          select(variable, pns_cns, description, total_n_var, perc))

stratified_neuro_counts_age_combined$variable <- factor(stratified_neuro_counts_age_combined$variable,
                                                                      levels=c("0-17 Years", "18-25 Years", "26-49 Years", "50-69 Years", "70-79 Years", "80+ Years"))
group.colors <- c(NNC = "gray", PNS = "slateblue", CNS ="tomato")

adult_ped_combined_plot <- ggplot(stratified_neuro_counts_age_combined,
                                                aes(x = total_n_var, y = reorder(description,
                                                                                 total_n_var), 
                                                    fill = pns_cns)) +
  geom_bar(stat = "identity") +
  geom_text(
    aes(x = total_n_var, y = description, label = perc),
    hjust = -0.1,
    #hjust="inward",
    size = 6,
    inherit.aes = TRUE) +
  scale_x_continuous(expand = expansion(mult = c(0,0.2))) + # to ensure percent labels are within the figure
  facet_grid(pns_cns ~ variable, scales = "free", space = "free_y") + 
  scale_y_discrete("", position="left", labels = function(x) str_wrap(x, width = 100)) +   
  scale_fill_manual(values = group.colors) +
  xlab("Number of Patients") +
  theme(legend.position = "none",
        strip.text.y = element_text(size = 40),
        strip.text.x = element_text(size = 35),
        axis.text.y.left = element_text(size = 25, face = "bold"),
        axis.text.x = element_text(size = 25),
        axis.title.x = element_text(size = 35))

ggsave("figures/Fig3_diagnoses_by_age_adult_ped_18group.png", adult_ped_combined_plot, width = 40, height = 19, dpi = 300)

Evaluate Diagnoses by Outcome

stratified_neuro_counts_adult$population = "adult"
stratified_neuro_counts_pediatric$population = "pediatric"

stratified_neuro_counts_combine <- rbind(stratified_neuro_counts_adult, stratified_neuro_counts_pediatric)

## determine total number of patients who died or severe
n_death_adult = get_table_n(df = tableone_adult, var = "Deceased") %>% as.numeric()
n_severe_adult = get_table_n(df = tableone_adult, var = "Severe") %>% as.numeric()

n_death_pediatric = get_table_n(df = tableone_pediatric, var = "Deceased") %>% as.numeric()
n_severe_pediatric = get_table_n(df = tableone_pediatric, var = "Severe") %>% as.numeric()

stratified_neuro_counts_outcomes <- stratified_neuro_counts_combine %>% 
  filter(variable %in% c("Severe", "Deceased"),
         !code == "NN", 
         !total_n_var == 0) %>%          
         # total_n would be calculating total codes across each outcome group which is not true reflection of patient counts b/c patients can have more than one code
         #mutate(total_n = sum(total_n_var)) %>% 
         group_by(variable, description, population) %>% 
  mutate(total_n_var = sum(total_n_var)) %>% 
  ungroup() %>% 
  mutate(perc = case_when(variable == "Severe" & population == "adult" ~ round(total_n_var/n_severe_adult*100,1),
                                 variable == "Deceased" & population == "adult" ~ round(total_n_var/n_death_adult*100,1),
                                 variable == "Severe" & population == "pediatric" ~ round(total_n_var/n_severe_pediatric*100,1),
                                 variable == "Deceased" & population == "pediatric" ~ round(total_n_var/n_death_pediatric*100,1)),
         perc = if_else(perc < 0.1, '<0.1', as.character(perc)),
       perc = paste0(perc, "%")) %>% 
  select(-code, -concept_type) %>% 
  distinct()
adult_outcomes = ggplot(stratified_neuro_counts_outcomes %>% 
         filter(population == "adult") %>% 
           mutate(variable = as.factor(variable) %>% 
                                          fct_recode(
                                            Mortality = "Deceased")),
       aes(x = total_n_var, y = reorder(description, total_n_var), fill = pns_cns)) +
  geom_bar(stat = "identity") +
 geom_text(
    aes(x = total_n_var, y = description, label = perc),
    hjust = -0.1,
    #hjust="inward",
    size = 5,
    inherit.aes = TRUE) +
  scale_x_continuous(expand = expansion(mult = c(0,0.2))) + 
  facet_grid(pns_cns ~ variable, scales = "free", space = "free_y") + 
  scale_y_discrete("", position="left", labels = function(x) str_wrap(x, width = 100)) +   
  scale_fill_manual(values = group.colors) +
  xlab("Number of Patients") + 
    theme(legend.position = "none",
        strip.text.y = element_text(size = 35),
        strip.text.x = element_text(size = 30),
        axis.text.y.left = element_text(size = 20, face = "bold"),
        axis.text.x = element_text(size = 15),
        axis.title.x = element_text(size = 35))

ggsave("figures/SuppFig1_diagnoses_by_outcomes_adult.png", adult_outcomes, width = 25, height = 15, dpi = 300)
pediatric_outcomes = ggplot(stratified_neuro_counts_outcomes %>% 
         filter(population == "pediatric") %>% 
           mutate(variable = as.factor(variable) %>% 
                                          fct_recode(
                                            Mortality = "Deceased")),
       aes(x = total_n_var, y = reorder(description, total_n_var), fill = pns_cns)) +
  geom_bar(stat = "identity") +
 geom_text(
    aes(x = total_n_var, y = description, label = perc),
    hjust = -0.1,
    #hjust="inward",
    size = 5,
    inherit.aes = TRUE) +
  scale_x_continuous(expand = expansion(mult = c(0,0.2))) + 
  facet_grid(pns_cns ~ variable, scales = "free") + 
  scale_y_discrete("", position="left", labels = function(x) str_wrap(x, width = 100)) +   
  scale_fill_manual(values = group.colors) +
  xlab("") + 
  theme(legend.position = "none",
        strip.text.y = element_text(size = 35),
        strip.text.x = element_text(size = 30),
        axis.text.y.left = element_text(size = 20, face = "bold"),
        axis.text.x = element_text(size = 15),
        axis.title.x = element_text(size = 35))

ggsave("figures/SuppFig1_diagnoses_by_outcomes_pediatric.png", pediatric_outcomes, width = 25, height = 9, dpi = 300)
#grid.arrange(pediatric_outcomes, adult_outcomes, ncol=1)

ggsave("figures/SuppFig1_diagnoses_by_outcome_combined.png", grid.arrange(pediatric_outcomes, adult_outcomes, ncol=1), width = 25, height = 25, dpi = 300)

Evaluate patients with “Both” CNS and PNS diseases

These patients were excluded from the primary analysis due to potential confounding.

# create empty list to store the counts of "both" patients
both_counts_list <- list()

# for each healthcare system, retrieve the number of excluded "both" patients recorded
for (i in sorted_sites) {
  n_both <- results[[paste(i)]][["first_hosp_results"]][["both_counts"]]
  
  both_counts_list[[i]] <- n_both
}

both_counts <- bind_rows(both_counts_list) %>% t() %>% data.frame() %>% sum()

print(paste(both_counts, "(", round(both_counts/(as.numeric(tableone_combine$N[1])+both_counts)*100,2), "%)")) 
## [1] "1990 ( 1.84 %)"

Comorbidity Analysis

Create comorbidity count table

comorbidity_table_adult <- comorb_table_wide %>%
  select(Comorbidity, population, Comorb_Total, `NNC_N_%`, `CNS_N_%`, `PNS_N_%`) %>%
  filter(population == "Adult") %>% 
  rename(N = "Comorb_Total",
         NNC = "NNC_N_%",
         CNS = "CNS_N_%",
         PNS = "PNS_N_%") %>% 
  mutate(N = paste0(N, " (", round(N/n_adult, 2)*100, "%)"))

comorbidity_table_pediatric <- comorb_table_wide %>%
  select(Comorbidity, population, Comorb_Total, `NNC_N_%`, `CNS_N_%`, `PNS_N_%`) %>%
  filter(population == "Pediatric") %>% 
  rename(N = "Comorb_Total",
         NNC = "NNC_N_%",
         CNS = "CNS_N_%",
         PNS = "PNS_N_%") %>% 
  mutate(N = paste0(N, " (", round(N/n_pediatric, 2)*100, "%)"))

supp_comorbidity_table <- rbind(comorbidity_table_adult, comorbidity_table_pediatric) %>% 
  arrange(Comorbidity)

write.csv(supp_comorbidity_table, "tables/SuppTable_Comorbidity_Counts.csv", row.names = FALSE)

Create vector counts of patients with comorbidity data

neuro_pt_counts_sum_adult <- neuro_pt_counts %>% 
  filter(population == "Adult") %>% 
  select(-population)

neuro_pt_counts_sum_pediatric <- neuro_pt_counts %>% 
  filter(population == "Pediatric") %>% 
  select(-population)

none_n_adult = neuro_pt_counts_sum_adult$None_n
cns_n_adult = neuro_pt_counts_sum_adult$CNS_n
pns_n_adult = neuro_pt_counts_sum_adult$PNS_n

none_n_pediatric = neuro_pt_counts_sum_pediatric$None_n
cns_n_pediatric = neuro_pt_counts_sum_pediatric$CNS_n
pns_n_pediatric = neuro_pt_counts_sum_pediatric$PNS_n

Calculate Relative Risk (RR)

calculate_rr <- function(is_pediatric = FALSE) {
  
  if(is_pediatric == FALSE) {
    cns_n = cns_n_adult
    pns_n = pns_n_adult
    neuro_pt_counts_sum = neuro_pt_counts_sum_adult
    pop = "Adult"
    # create a list of comorbidities
    list_of_comorbs <- comorb_table_wide %>% 
      filter(population == paste(pop)) %>% 
      distinct(Comorbidity) 
    list_of_comorbs <- list_of_comorbs$Comorbidity
    } else  {
    cns_n = cns_n_pediatric
    pns_n = pns_n_pediatric
    neuro_pt_counts_sum = neuro_pt_counts_sum_pediatric
    pop = "Pediatric"
    # create a list of comorbidities
    list_of_comorbs <- comorb_table_wide %>% 
      filter(population == paste(pop)) %>% 
      distinct(Comorbidity) %>% 
      as.vector()
    list_of_comorbs <- list_of_comorbs$Comorbidity
  }
  
  
  # create empty list to save relative risk calculations
  rr_calcs <- list()
  
  # for each comorbidity, we will calculate the relative risk of having a CNS and PNS diagnosis
  for (i in list_of_comorbs) {
  
    # https://www.cdc.gov/csels/dsepd/ss1978/lesson3/section5.html
    # create matrix as:
    # exposed group - with outcome, without outcome
    # non exposed group - with outcome, without outcome
  
    # where variables ending with `_Total` are the total number of CNS, PNS, or NNC patients with the respective comorbidity.
    # variables ending with `_n` are the total numbers in the entire population
  
    # to calculate confidence intervals
  #https://sphweb.bumc.bu.edu/otlt/mph-modules/bs/bs704_confidence_intervals/bs704_confidence_intervals8.html
  
    rr_cns <- comorb_table_wide %>%
      filter(population == paste(pop),
             Comorbidity == paste(i)) %>%
      mutate(a = CNS_Total, # number CNS w/comorb
             b = Comorb_Total - CNS_Total, # number of comorb pts w/o CNS
             c = cns_n - CNS_Total, # number of CNS pts w/o comorb
             d = sum(neuro_pt_counts_sum) - Comorb_Total - c, # number of pts w/o comorb or CNS
             risk_cns = a/(a+b),
             risk_non_cns = c/(c+d),
             rr_CNS = risk_cns/risk_non_cns,
             # for confidence intervals
             ci_1 = (b/a)/(a+b), # (n1-x1)/n1 
             ci_2 = (d/c)/(c+d), # (n2-x2)/n2
             ci_root = sqrt(ci_1 + ci_2),
             ci_lower = log(rr_CNS) - (1.96*ci_root),
             ci_upper = log(rr_CNS) + (1.96*ci_root),
             l_ci_cns = exp(ci_lower),
             u_ci_cns = exp(ci_upper)) %>%
      select(Comorbidity, Comorb_Total, None_Total, CNS_Total, PNS_Total, rr_CNS, l_ci_cns, u_ci_cns)
  
      rr_pns <- comorb_table_wide %>%
      filter(population == paste(pop),
             Comorbidity == paste(i)) %>%
      mutate(a = PNS_Total, # number PNS w/comorb
             b = Comorb_Total - PNS_Total, # number of comorb pts w/o PNS
             c = pns_n - PNS_Total, # number of PNS pts w/o comorb
             d = sum(neuro_pt_counts_sum) - Comorb_Total - c, # number of pts w/o comorb or PNS
             risk_pns = a/(a+b),
             risk_non_pns = c/(c+d),
             rr_PNS = risk_pns/risk_non_pns,
             # for confidence intervals
             ci_1 = (b/a)/(a+b),
             ci_2 = (d/c)/(c+d),
             ci_root = sqrt(ci_1 + ci_2),
             ci_lower = log(rr_PNS) - (1.96*ci_root),
             ci_upper = log(rr_PNS) + (1.96*ci_root),
             l_ci_pns = exp(ci_lower),
             u_ci_pns = exp(ci_upper)) %>%
      select(Comorbidity, rr_PNS, l_ci_pns, u_ci_pns)
  
      rr <- rr_cns %>%
        left_join(., rr_pns, by = c("Comorbidity"))
  
    rr_calcs[[i]] <- rr
  
  }
  
  rr_results <- bind_rows(rr_calcs) %>%
      mutate(rr_CNS = round(rr_CNS, 2),
           l_ci_cns = round(l_ci_cns, 2),
           u_ci_cns = round(u_ci_cns, 2),
           rr_PNS = round(rr_PNS, 2),
           l_ci_pns = round(l_ci_pns, 2),
           u_ci_pns = round(u_ci_pns, 2),
           `RR CNS (95% CI)` = paste(rr_CNS, "(", l_ci_cns, ",", u_ci_cns, ")"),
           `RR PNS (95% CI)` = paste(rr_PNS, "(", l_ci_pns, ",", u_ci_pns, ")")) %>%
    select(Comorbidity, rr_CNS, l_ci_cns, u_ci_cns, `RR CNS (95% CI)`,
           rr_PNS, l_ci_pns, u_ci_pns, `RR PNS (95% CI)`)
  
  # tidy up data
  write.csv(rr_results %>%
              arrange(desc(rr_CNS)) %>%
              select(Comorbidity, `RR CNS (95% CI)`, `RR PNS (95% CI)`), 
            paste0("tables/Supp.Table4_RR_", pop, ".csv"), row.names = FALSE)
  
    # reformat data
  rr_results_tidy_rr <- rr_results %>%
    select(Comorbidity, rr_CNS, rr_PNS) %>%
    pivot_longer(cols = rr_CNS:rr_PNS, names_to = "Neuro Status", values_to = "RR") %>%
    mutate(`Neuro Status` = if_else(`Neuro Status` == "rr_CNS", "CNS", "PNS"))
  
  rr_results_tidy_l_ci <- rr_results %>%
    select(Comorbidity, l_ci_cns, l_ci_pns) %>%
    pivot_longer(cols = l_ci_cns:l_ci_pns, names_to = "Neuro Status", values_to = "l_CI") %>%
    mutate(`Neuro Status` = if_else(`Neuro Status` == "l_ci_cns", "CNS", "PNS"))
  
  rr_results_tidy_u_ci <- rr_results %>%
    select(Comorbidity, u_ci_cns, u_ci_pns) %>%
    pivot_longer(cols = u_ci_cns:u_ci_pns, names_to = "Neuro Status", values_to = "u_CI") %>%
    mutate(`Neuro Status` = if_else(`Neuro Status` == "u_ci_cns", "CNS", "PNS"))
  
  # combine all tidy results
  rr_results_tidy <- rr_results_tidy_rr %>%
    left_join(., rr_results_tidy_l_ci,
                               by = c("Comorbidity", "Neuro Status")) %>%
    left_join(., rr_results_tidy_u_ci,
                               by = c("Comorbidity", "Neuro Status"))
  
  # order by CNS diagnoses with higher RR
  cns_top_results <- rr_results_tidy %>%
    filter(`Neuro Status` == "CNS") %>%
    arrange(RR) %>%
    select(Comorbidity)
  
  rr_results_tidy$Comorbidity <-  ordered(rr_results_tidy$Comorbidity, levels = cns_top_results$Comorbidity)
  
  cns_rr_plot <- ggplot(rr_results_tidy %>%
           filter(`Neuro Status` == "CNS"),
         aes(x = RR, y = Comorbidity)) +
    geom_vline(aes(xintercept = 1), size = .25, linetype = "dashed") +
      geom_errorbarh(aes(xmax = u_CI, xmin = l_CI), size = 0.7, height = 0.2, color = "gray50") +
      geom_point(size = 1.5, color = "tomato") +
      scale_x_continuous(breaks = seq(0, 4, 1), labels = seq(0, 4, 1),
                         limits = c(0,4)) +
    ylab("") +
    xlab("Relative Risk") +
    ggtitle("CNS Patients") + 
    theme(axis.text.y = element_text(face = "bold"))
  
  # arrange by top PNS results
  pns_top_results <- rr_results_tidy %>%
    filter(`Neuro Status` == "PNS") %>%
    arrange(RR) %>%
    select(Comorbidity)
  
  rr_results_tidy$Comorbidity <-  ordered(rr_results_tidy$Comorbidity, levels = pns_top_results$Comorbidity)
  
  
  pns_rr_plot <- ggplot(rr_results_tidy %>%
           filter(`Neuro Status` == "PNS"),
         aes(x = RR, y = Comorbidity)) +
    geom_vline(aes(xintercept = 1), size = .25, linetype = "dashed") +
      geom_errorbarh(aes(xmax = u_CI, xmin = l_CI), size = 0.70, height = 0.2, color = "gray50") +
      geom_point(size = 1.5, color = "slateblue") +
      scale_x_continuous(breaks = seq(0, 1.5, 0.5), labels = seq(0, 1.5, 0.5),
                         limits = c(0,1.5)) +
    ylab("") +
    xlab("Relative Risk") +
    ggtitle("PNS Patients") + 
    theme(axis.text.y = element_text(face = "bold"))
  
  pns_rr_plot <- set_panel_size(pns_rr_plot,
                            width  = unit(7, "cm"),
                            height = unit(6.5, "in"))
  
  cns_rr_plot <- set_panel_size(cns_rr_plot,
                            width  = unit(7, "cm"),
                            height = unit(6.5, "in"))
  
  #grid.arrange(cns_rr_plot, pns_rr_plot, ncol=2)
  
  ggsave(paste0("figures/Figure4.relative_risk_", pop, ".png"), grid.arrange(cns_rr_plot, pns_rr_plot, ncol=2), width = 15, height = 8, dpi = 300)
  
  }

LPCA deviance explained

How much deviance is explained with 10 principal components for 30 comorbidity types?

Across all healthcare systems, 10 principal components explained above 75% of the deviance.

compare_deviance <- function(sorted_sites, is_pediatric=FALSE) {
  
  pca_dev <- list()
  
  if(is_pediatric==FALSE) {
    population = "adults"
    sorted_sites = sorted_sites[!sorted_sites%in% c("GOSH_results", "BCH_results")]
    } else {
    population = "pediatrics"
    }

  # for each healthcare system, we will calculate the total number of patients with each ICD code
  for (i in sorted_sites) {
    print(i)
    tmp <- get(i)
    tmp_pca <- tmp[[c(
      "comorbidities",
      paste0("comorb_", population),
      "deviance_expl"
    )]] %>% 
      data.frame() %>% 
      rename(dev_expl = '.')
    tmp_pca$site <- tmp[["site"]]
    pca_dev[[i]] <- tmp_pca
    
  }
  
  pca_df <- bind_rows(pca_dev)
  
  return(pca_df)
  
}

pca_adult <- compare_deviance(sorted_sites = adult_sites, is_pediatric = FALSE)
## [1] "APHP_results"
## [1] "FRBDX_results"
## [1] "UKFR_results"
## [1] "ICSM_results"
## [1] "HPG23_results"
## [1] "NUH_results"
## [1] "MGB_results"
## [1] "UPENN_results"
## [1] "UMICH_results"
## [1] "NWU_results"
## [1] "UPITT_results"
## [1] "H12O_results"
## [1] "UKY_results"
## [1] "VA1_results"
## [1] "VA2_results"
## [1] "VA3_results"
## [1] "VA4_results"
## [1] "VA5_results"
## [1] "UCLA_results"
pca_data <- bind_rows(pca_adult)


theme_set(theme_classic())

pca_plot <- ggplot(pca_data, 
       aes(x = fct_reorder(site, -dev_expl), y = dev_expl, group = 1)) +
  geom_point(size = 2.5) +
  geom_line() +
  xlab("Healthcare System") + 
  ylab("Proportion of Deviance explained") +  
  ylim(0,1) + 
  theme(axis.text.x=element_text(angle=90, hjust=1, face = "bold", size = 12),
        axis.text.y=element_text(face = "bold"), 
        axis.title.y = element_text(face = "bold", size = 15)); pca_plot

ggsave("figures/SuppFig_pca_deviance.png", pca_plot, height = 6, width = 6)
---
title: "**COVID-19 and Neurological Illness - Patient Characteristics**"
author: "Meg Hutch, Ji Son, Trang Le, Chuan Hong, Xuan Wang, Zahra Shakeri"
date: "04/11/2023"
output:
  html_document:
    toc: true
    toc_float: true
    code_download: true
    theme: spacelab
---

This notebook describes the **106,229** hospitalized COVID-19 positive patients that were studied as part of the [Consortium for Clinical Characterization of COVID-19 by EHR](https://covidclinical.net/) Neurology group.

```{r message=FALSE, warning=FALSE}
library(tidyverse)
library(gridGraphics)
library(gridExtra)
library(ggpubr)
library(DT)
library(kableExtra)
library(cowplot)
library(RColorBrewer)
library(glue)
library(egg)
source("R/plot_theme.R")
source("R/demo_tables.R")
source("R/utils.R")
```

# **Import Data from each Healthcare system**

```{r message=FALSE, warning=FALSE}
# read in files from results folder 
# this folder contains all of the local healthcare system level analyses
rdas <- list.files(
  path = "results",
  pattern = ".rda",
  full.names = TRUE
)


for (rda in rdas) {
  load(rda)
}

rm(rdas, rda)

# create a list of participating healthcare systems from our study tracking spreadsheet
site_google_url <- "https://docs.google.com/spreadsheets/d/1epcYNd_0jCUMktOHf8mz5v651zy1JALD6PgzobrGWDY/edit?usp=sharing"

# load site parameters
site_params <- googlesheets4::read_sheet(site_google_url, sheet = 1)
site_avails <- googlesheets4::read_sheet(site_google_url, sheet = 2)

# filter the list of sites who ran the analysis
sorted_sites <- site_avails %>%
  filter(!is.na(date_v4_received)) %>%
  pull(siteid) %>%
  paste("results", sep = "_")

# list sites without race
sites_wo_race <- site_params %>%
  filter(!include_race) %>%
  pull(siteid)

# combine all rda files with 'results' in name
results <- mget(ls(pattern = "results"))

## load in the pre-processed comorbidity and meta-data
load("processed/comorbidity_table_cnps.rda")
load("processed/neuro_pt_counts.rda") 
```

# **Pre-processing**

**1. Format primary demographic characteristics**

```{r message=FALSE, warning=FALSE}
# create list of hospitals for adult and pediatric analyses
adult_sites = sorted_sites[!sorted_sites %in% c("BCH_results", "GOSH_results")]

pediatric_sites = sorted_sites[!sorted_sites %in% c("VA1_results", "VA2_results", "VA3_results", "VA4_results", "VA5_results")]

demo_table_adult <- create_demo_tableone(sorted_sites = adult_sites, is_pediatric = FALSE)

demo_table_pediatric <- create_demo_tableone(sorted_sites = pediatric_sites, is_pediatric = TRUE)

demo_table_combine <- rbind(demo_table_adult, demo_table_pediatric)
```

**2. Format clinical characteristics (i.e: continuous variables such as median time to discharge)**


```{r message=FALSE, warning=FALSE}
clinical_table_adult <- create_clinical_tableone(sorted_sites = adult_sites, is_pediatric = FALSE)

clinical_table_pediatric <- create_clinical_tableone(sorted_sites = pediatric_sites, is_pediatric = TRUE)

clinical_table_combine <- rbind(clinical_table_adult, clinical_table_pediatric)
```


**3. Conduct additional pre-processing of clinical variables**

```{r message=FALSE, warning=FALSE}
clinical_table_adult_clean <- clean_clinical_tables(clinical_table = clinical_table_adult)

clinical_table_pediatric_clean <- clean_clinical_tables(clinical_table = clinical_table_pediatric)

clinical_table_clean_combine <- rbind(clinical_table_adult_clean, clinical_table_pediatric_clean)
```

**Add Comorbidity data**

```{r}
comorbidity_table_adults <- comorb_table_wide %>%
  filter(population == "Adult") %>% 
  arrange(desc(Comorb_Total)) %>% 
  slice(1:4) # return top comorbidities

comorbidity_table_pediatric <- comorb_table_wide %>%
  filter(population == "Pediatric") %>% 
  arrange(desc(Comorb_Total)) %>% 
  slice(1:4) # return top comorbidities

top_comorb_adults <- comorbidity_table_adults$Comorbidity
top_comorb_pediatrics <- comorbidity_table_pediatric$Comorbidity
```

# **Demographics**

## Table One - Combined

```{r}
neuro_pt_counts_sum <- colSums(neuro_pt_counts[,-1]) %>% 
  data.frame() %>% 
  t() %>% 
  data.frame()

# specify the order of Table 1
row_order_adult <- c(
  "All Patients", "Female", "Male", "Unknown Sex",
  "0-2", "3-5", "6-11", "12-17", "18-25",
  "26-49", "50-69", "70-79", "80+", "Unknown Age",
  "American Indian", "Asian", "Black",
  "Hawaiian/Pacific Islander",
  "Hispanic/Latino", "White", "Other",
  top_comorb_adults, 
  "Median Elixhauser score  (sd) ",
  "Median pre admission cns  (sd) ",
  "Median pre admission pns  (sd) ",
  "Non-Severe", "Severe",
  "Median time to severe  (sd) ",
  "Alive", "Deceased",
  "Median time to death  (sd) ",
  "Discharged", "Not Discharged",
  "Median time to first discharge  (sd) ",
  "Not Readmitted", "Readmitted",
  "Median time to first readmission  (sd) ",
  "Median number of readmissions  (sd) "
  )


tableone_combine <- create_tableone(demo_table = demo_table_combine,
                                    clinical_table = clinical_table_clean_combine) %>%
  rbind(., comorb_table_wide %>%
          group_by(Comorbidity) %>%
          mutate(Comorb_Total = sum(Comorb_Total, na.rm = TRUE),
          None_sum = sum(None_Total, na.rm = TRUE),
          CNS_sum = sum(CNS_Total, na.rm = TRUE),
          PNS_sum = sum(PNS_Total, na.rm = TRUE),
          None_perc = round(None_sum/neuro_pt_counts_sum$None_n*100,1),
          Central_perc = round(CNS_sum/neuro_pt_counts_sum$CNS_n*100,1),
          Peripheral_perc = round(PNS_sum/neuro_pt_counts_sum$PNS_n*100,1),
          None = paste(None_sum, "(", None_perc, ")"),
          Central = paste(CNS_sum, "(", Central_perc, ")"),
          Peripheral = paste(PNS_sum, "(", Peripheral_perc, ")")) %>%
          distinct(Comorbidity, Comorb_Total, None, Central, Peripheral) %>%
          rename(`Table 1` = "Comorbidity",
          N = "Comorb_Total")) %>%
  slice(match(row_order_adult, `Table 1`))

kbl(tableone_combine %>%
      rename("No Neurological Condition (NNC)" = None)) %>%
      add_header_above(c(
      " ",
      "Total Patients" = 1,
      "Neurological Disease" = 3
      )) %>%
      kable_paper("striped", full_width = F) %>%
      pack_rows("Sex", 2, 4) %>%
      pack_rows("Age", 5, 13) %>%
      pack_rows("Race & Ethnicity", 14, 20) %>%
      pack_rows("Past Medical History", 21, 27) %>%
      pack_rows("Severity", 28, 30) %>%
      pack_rows("Survival", 31, 33) %>%
      pack_rows("Discharge", 34, 36) %>%
      pack_rows("Readmission", 37, 40)
write.csv(tableone_combine, 'tables/table1.csv', row.names = FALSE)
```

## Table One - Adults

```{r}
tableone_adult <- create_tableone(demo_table = demo_table_adult,
                                  clinical_table = clinical_table_adult_clean) %>%
                                  rbind(., comorbidity_table_adults %>%
                                          select(Comorbidity, Comorb_Total, `NNC_N_%`, `CNS_N_%`, `PNS_N_%`) %>%
                                          rename(`Table 1` = "Comorbidity",
                                                 N = "Comorb_Total",
                                                 None = "NNC_N_%",
                                                 Central = "CNS_N_%",
                                                 Peripheral = "PNS_N_%")) %>% 
  slice(match(row_order_adult, `Table 1`))

kbl(tableone_adult %>%
    rename("No Neurological Condition (NNC)" = None)) %>%
  add_header_above(c(
  " ",
  "Total Patients" = 1,
  "Neurological Disease" = 3
  )) %>%
  kable_paper("striped", full_width = F) %>%
  pack_rows("Sex", 2, 4) %>%
  pack_rows("Age", 5, 9) %>%
  pack_rows("Race & Ethnicity", 10, 16) %>%
  pack_rows("Past Medical History", 17, 23) %>%
  pack_rows("Severity", 24, 26) %>%
  pack_rows("Survival", 27, 29) %>%
  pack_rows("Discharge", 30, 32) %>%
  pack_rows("Readmission", 33, 36)
```

## Table One - Pediatrics

```{r}
# specify the order of Table 1
row_order_pediatric <- c(
  "All Patients", "Female", "Male", "Unknown Sex",
  "0-2", "3-5", "6-11", "12-17", "18-25",
  "26-49", "50-69", "70-79", "80+", "Unknown Age",
  "American Indian", "Asian", "Black",
  "Hawaiian/Pacific Islander",
  "Hispanic/Latino", "White", "Other",
  top_comorb_pediatrics, 
  "Median Elixhauser score  (sd) ",
  "Median pre admission cns  (sd) ",
  "Median pre admission pns  (sd) ",
  "Non-Severe", "Severe",
  "Median time to severe  (sd) ",
  "Alive", "Deceased",
  "Median time to death  (sd) ",
  "Discharged", "Not Discharged",
  "Median time to first discharge  (sd) ",
  "Not Readmitted", "Readmitted",
  "Median time to first readmission  (sd) ",
  "Median number of readmissions  (sd) "
  )

tableone_pediatric <- create_tableone(demo_table = demo_table_pediatric,
                                      clinical_table = clinical_table_pediatric_clean)  %>%
  rbind(., comorbidity_table_pediatric %>%
          select(Comorbidity, Comorb_Total, `NNC_N_%`, `CNS_N_%`, `PNS_N_%`) %>% 
          rename(`Table 1` = "Comorbidity",
                  N = "Comorb_Total",
                  None = "NNC_N_%",
                  Central = "CNS_N_%",
                  Peripheral = "PNS_N_%")) %>% 
  slice(match(row_order_pediatric, `Table 1`))

kbl(tableone_pediatric %>%
      rename("No Neurological Condition (NNC)" = None)) %>%
  add_header_above(c(
  " ",
  "Total Patients" = 1,
  "Neurological Disease" = 3
  )) %>%
  kable_paper("striped", full_width = F) %>%
  pack_rows("Sex", 2, 4) %>%
  pack_rows("Age", 5, 8) %>%
  pack_rows("Race & Ethnicity", 9, 15) %>%
  pack_rows("Past Medical History", 16, 22) %>%
  pack_rows("Severity", 23, 25) %>%
  pack_rows("Survival", 26, 28) %>%
  pack_rows("Discharge", 29, 31) %>%
  pack_rows("Readmission", 32, 35)
```

**Sample Size**

```{r message=FALSE, warning=FALSE}
n_adult <- as.numeric(tableone_adult$N)[1]
n_pediatric <- as.numeric(tableone_pediatric$N)[1]
```

# **Demographic & Clinical Course Analysis**

Evaluate characteristics among neuro status groups

**1. Conduct chi-squared tests for categorical demographic variables**

```{r}
compute_chi_square <- function(tableone, demo_table) {
  
  N = tableone %>% select(N) %>% head(1) %>% as.numeric()

  # sum of all neuro condition groups
  cns_n = as.numeric(gsub( " .*$", "", tableone[1,"Central"]))
  pns_n = as.numeric(gsub( " .*$", "", tableone[1,"Peripheral"]))
  none_n = as.numeric(gsub( " .*$", "", tableone[1,"None"]))
  
  # sum up the total counts of each demographic variable across healthcare systems
  tableOne_sums <- demo_table %>%
    group_by(variable, Demo_var, Demo_var_i) %>%
    summarise(across(
      starts_with("n_var"),
      function(x) sum(x, na.rm = TRUE)
    ), .groups = "drop")
  
  # create list of demographic/clinical variables
  vars = tableOne_sums$variable
  
  # for binary variables, we should group by category 
  # Thus, only run the analysis for sex.male rather than both sex.female AND sex.male which would be redundant
  # we will also remove `age_group.unknown` since their is only 1 patient here
  exclude <- c("readmitted.false", "sex.female", "sex.other", "survival.deceased", "severity.non-severe",
               "covid_discharged.discharged", "age_group.Unknown")
  vars <- vars[!vars %in% exclude]
  
  # create empty list for chi.square test results
  chi_result_list = list()
  
  # for each variable, we will run the chi.square test
  for(i in vars) {
    test = calc_chisq(i, tableOne_sums, none_n, pns_n, cns_n)
    X2 <- round(test$statistic, 4)
    p_value <- test$p.value
    df <- test$parameter
    Variable = paste(i)
    
    chi_results <- cbind(Variable, X2, p_value, df) %>% 
      data.frame() %>% 
      mutate(p_value = as.numeric(p_value))
    
    rownames(chi_results) <- NULL
    
    chi_result_list[[i]] <- chi_results
  }
  
  # save list of chi.square test results
  chisq_results <- bind_rows(chi_result_list)
  
  return(chisq_results)
  
  
  }
```

**Combined Adult & Pediatric**

```{r}
chisq_combine <- compute_chi_square(tableone = tableone_combine %>%
                                      filter(!`Table 1` == 'Discharged',
                                             !`Table 1` == 'Not Discharged'),
                                    demo_table = demo_table_combine %>%
                                      filter(!variable == 'covid_discharged.discharged',
                                             !variable == 'covid_discharged.not discharged'))

datatable(chisq_combine %>% arrange(p_value))
```

**Adult**

```{r}
chisq_adult <- compute_chi_square(tableone = tableone_adult %>%
                                      filter(!`Table 1` == 'Discharged',
                                             !`Table 1` == 'Not Discharged'),
                                    demo_table = demo_table_adult %>%
                                      filter(!variable == 'covid_discharged.discharged',
                                             !variable == 'covid_discharged.not discharged'))

datatable(chisq_adult %>% arrange(p_value))
```

**Pediatric**

```{r}
chisq_pediatric <- compute_chi_square(tableone = tableone_pediatric %>%
                                      filter(!`Table 1` == 'Discharged',
                                             !`Table 1` == 'Not Discharged'),
                                    demo_table = demo_table_pediatric %>%
                                      filter(!variable == 'covid_discharged.discharged',
                                             !variable == 'covid_discharged.not discharged'))

datatable(chisq_pediatric %>% arrange(p_value))
```

**2. Conduct anova (Kruskal-Wallis) tests for continuous clinical variables**

```{r}
compute_kruskal <- function(clinical_table) {

  # create empty list to save processed variables
  processed_list_figs <- list()
  
  # create a list of clinical variables to further pre-process
  vars_to_process <- unique(clinical_table$name)
  
  # for each variable, we will remove the [min, max] as before
  for (i in vars_to_process) {
    mod_table <- clinical_table %>%
      rename("var" = name) %>%
      filter(var == i) %>%
      mutate(
        site = toupper(site),
        None = sub("(\\(.*|\\[.*)", "", None),
        Peripheral = sub("(\\(.*|\\[.*)", "", Peripheral),
        Central = sub("(\\(.*|\\[.*)", "", Central)
      ) %>%
      mutate(across(None:Central, as.numeric))
  
    processed_list_figs[[i]] <- mod_table
  }
  
  clinical_table_anova <- bind_rows(processed_list_figs)
  
  # format the continuous variables
  cont_vars = clinical_table_anova %>%
    mutate(
      # create a variable without mean/median prefix
      grouped_var = gsub("Mean |Median | \\[Min, Max\\]| \\(SD\\)", "", var)
    ) %>%
    pivot_longer(
      cols = None:Central,
      names_to = "type"
    ) %>%
    mutate(type = factor(type, levels = c("None", "Central", "Peripheral"))) %>% 
    # select variables to analyze
    filter(grouped_var %in% c("Elixhauser score",
                              "pre admission cns", 
                              "pre admission pns", 
                              "time to death", 
                              "time to first discharge", 
                              "time to severe",
                              "number of readmissions",
                              "time to first readmission")) %>% 
    distinct()
  
  # create function to conduct anova
  cont_var_results <- function(variable) {
    
    df <- cont_vars %>% 
    filter(grouped_var == paste(variable))
    
    # kruskal.test calculates the kruskal-Wallis H-statistic 
    # does not assume normality between groups
    # also called the one-way ANOVA on ranks
    test = kruskal.test(df$value ~ df$type)
    
  }
  
  # create empty list for Kruskal-Wallis results
  cont_vars_list = list()
  
  # create list of unique variables
  outcome_cont_vars = unique(cont_vars$grouped_var)
  
  for(i in outcome_cont_vars) {
    
    test = cont_var_results(i)
    X2 <- round(test$statistic, 4)
    p_value <- test$p.value
    df <- test$parameter
    Variable = paste(i)
    
    kw <- cbind(Variable, X2, p_value, df) %>% 
      data.frame() 
    
    rownames(kw) <- NULL
    
    cont_vars_list[[i]] <- kw
  
    
  }
  
  anova_results <- bind_rows(cont_vars_list)
  
  }
```

**Adult & Pediatric Kruskall-Wallis**

```{r message=FALSE, warning=FALSE}

kw_combine <- compute_kruskal(clinical_table = clinical_table_combine)
datatable(kw_combine %>% arrange(p_value))
```

**Adult Clinical Kruskall-Wallis**

*Note: median CNS is 0 for all sites (reason for the NA values)*

```{r message=FALSE, warning=FALSE}

kw_adult <- compute_kruskal(clinical_table = clinical_table_adult)
datatable(kw_adult %>% arrange(p_value))
```

**Pediatric Kruskall-Wallis**

```{r message=FALSE, warning=FALSE}

kw_pediatric <- compute_kruskal(clinical_table = clinical_table_pediatric)
datatable(kw_pediatric %>% arrange(p_value))
```

**3. Combine chi-square and Kruskal-Wallis results**

We will evaluate significance when controlling for False Discovery Rate (FDR) 

```{r}
tableone_stats <- function(chisq_results, anova_results, population) {

  demo_stats <- rbind(chisq_results %>% select(Variable, p_value),
                      anova_results %>% select(Variable, p_value))
  
  # adjust p-values with FDR
  demo_stats$adj_p_value <- p.adjust(demo_stats$p_value, method = "fdr")
  
  # round & format p-value 
  demo_stats <- demo_stats %>% 
    mutate(p_value = as.numeric(p_value),
           pvalue = if_else(p_value < 0.001, "< 0.001", paste(round(p_value, 3))),
           adj.p_value = if_else(adj_p_value < 0.001, "< 0.001", paste(round(adj_p_value, 3))))
  
  # tidy up the demographics stat table
  demo_stats_tidy <- demo_stats %>% 
    rename(`Unadjusted P-value (raw)` = p_value,
           `FDR Adjusted P-value (raw)` = adj_p_value,
           `Unadjusted P-value (tidy)` = pvalue,
           `FDR Adjusted P-value (tidy)` = adj.p_value) %>% 
    select(Variable, `Unadjusted P-value (raw)`, `FDR Adjusted P-value (raw)`, `Unadjusted P-value (tidy)`, `FDR Adjusted P-value (tidy)`)

  write.csv(demo_stats_tidy, paste0("tables/Table1_pvals_", population, ".csv"), row.names = FALSE)
  
  return(demo_stats_tidy)
  
}
```

**Incorporate Comorbidities**

```{r}
# define matrix for each comorbidity
comorb_matrix <- function(comorbidity_table, comorbidity, population) {

    if(population=='Combined') {
      comorbidity_table = comorbidity_table
    } else if (population=="Adult") {
      comorbidity_table = comorbidity_table %>% 
        filter(population == "Adult")
    } else if (population=="Pediatric") {
      comorbidity_table = comorbidity_table %>% 
        filter(population == "Pediatric")
    } else {
      print('incorrect population specification')
    }
  
  mat_comorb <- comorbidity_table %>%
    filter(Comorbidity == paste(comorbidity)) %>%
    group_by(Comorbidity) %>% 
    mutate(None_comorb = sum(None_Total, na.rm = TRUE),
    CNS_comorb = sum(CNS_Total, na.rm = TRUE),
    PNS_comorb = sum(PNS_Total, na.rm = TRUE)) %>%
    ungroup() %>%
    distinct(None_comorb, CNS_comorb, PNS_comorb) %>%
    mutate(none_dif = neuro_pt_counts_sum$None_n - None_comorb,
    pns_dif = neuro_pt_counts_sum$PNS_n - PNS_comorb,
    cns_dif = neuro_pt_counts_sum$CNS_n - CNS_comorb) %>%
    data.frame() %>%
    #as.integer() %>%
    matrix(nrow = 2, ncol = 3, byrow = TRUE)

  comorb_results <- chisq.test(unlist(mat_comorb))
  
  results_df <- data.frame(Variable = paste(comorbidity),
                                X2 = round(comorb_results$statistic, 4),
                                p_value = comorb_results$p.value,
                                df = comorb_results$parameter)
  row.names(results_df) <- NULL
  
  return(results_df)
}

## combined comorbidities
comorbidity_combined_chi_list <- list()

for(i in top_comorb_adults) {
  
  comorb_chi_results <- comorb_matrix(comorbidity_table = comorb_table_wide, i, population = "Combined")
  comorbidity_combined_chi_list[[i]] <- comorb_chi_results
}

comorbidity_combined_chi <- comorbidity_combined_chi_list %>%
  bind_rows() 

chisq_combine_comorb <- chisq_combine %>%
  mutate(X2 = as.numeric(X2),
  df = as.numeric(df)) %>%
  bind_rows(comorbidity_combined_chi) 



## adult comorbidities
comorbidity_adult_chi_list <- list()

for(i in top_comorb_adults) {
  
  comorb_chi_results <- comorb_matrix(comorbidity_table = comorb_table_wide, i, population = "Adult")
  comorbidity_adult_chi_list[[i]] <- comorb_chi_results
}

comorbidity_adult_chi <- comorbidity_adult_chi_list %>%
  bind_rows() 

chisq_adult_comorb <- chisq_adult %>%
  mutate(X2 = as.numeric(X2),
  df = as.numeric(df)) %>%
  bind_rows(comorbidity_adult_chi) 

## pediatric comorbidities
comorbidity_pediatric_chi_list <- list()

for(i in top_comorb_pediatrics) {
  
  comorb_chi_results <- comorb_matrix(comorbidity_table = comorb_table_wide, i, population = "Pediatric")
  comorbidity_pediatric_chi_list[[i]] <- comorb_chi_results
}

comorbidity_pediatric_chi <- comorbidity_pediatric_chi_list %>%
  bind_rows() 

chisq_pediatric_comorb <- chisq_pediatric %>%
  mutate(X2 = as.numeric(X2),
  df = as.numeric(df)) %>%
  bind_rows(comorbidity_pediatric_chi_list) 
```

## Table One - Combined Analysis

```{r}
combine_tableone_stats <- tableone_stats(chisq_results = chisq_combine_comorb,
                                         anova_results = kw_combine,
                                         population = "combined") 
  

datatable(combine_tableone_stats)
```


## Table One - Adult Analysis

```{r}
adult_tableone_stats <- tableone_stats(chisq_results = chisq_adult_comorb,
                                         anova_results = kw_adult,
                                         population = "adult") 
  

datatable(adult_tableone_stats)
```

## Table One - Pediatric Analysis

```{r}
pediatric_tableone_stats <- tableone_stats(chisq_results = chisq_pediatric_comorb,
                                         anova_results = kw_pediatric,
                                         population = "pediatric") 
  

datatable(pediatric_tableone_stats)
```

# **Neurological Diagnosis Analysis**

Evaluate CNS and PNS neurological diagnoses in our patient cohort

**Format Diagnostic code data**

```{r fig.width=12}
# import list of ICD-9/ICD-10 codes
neuro_icds_10 <-
  readxl::read_excel("public-data/2020-02-22_neuro-icd10_CNSvPNS.xlsx", sheet = 2) %>% 
  rename(
    "icd" = `ICD-10`,
    "pns_cns" = `Nervous system Involvement (1=central, 2=peripheral, 3=irrelevant)`,
    "type" = `ICD-10_type (1=first three alphanumeric code only, 2=digits after decimal point)`) %>%
  filter(type == 1) %>% 
  mutate(pns_cns = as.factor(pns_cns) %>% fct_recode(
    Central = "1",
    Peripheral = "2"
  )) %>%
  distinct(icd, `Neurological Disease Category`, pns_cns, `ICD-10 Description Revised`) %>%
  rename("description" = `ICD-10 Description Revised`) %>%
  mutate(concept_type = "icd-10")

neuro_icds_9 <- read.csv("public-data/icd9_tab_CNSvPNS.csv") %>%
  rename(
    "Neurological Disease Category" = "Neurological.Disease.Category",
    "pns_cns" = `Nervous.system.Involvement..1.central..2.peripheral.`,
    "icd_description" = `icd9_desc_revised`
  ) %>%
  mutate(
    pns_cns = as.factor(pns_cns) %>% fct_recode(
      Central = "1",
      Peripheral = "2"
    ),
    concept_type = "DIAG-ICD9"
  ) %>%
  select(icd, `Neurological Disease Category`, pns_cns, icd_description) %>%
  rename("description" = icd_description) %>%
  mutate(concept_type = "icd-9")

# bind list of ICD-9/ICD-10 codes
icds <- rbind(neuro_icds_10, neuro_icds_9)
```

**Calculate total number of neuro diagnoses for each adults & pediatrics**

```{r}
compute_neuro_counts <- function(sorted_sites, sample_size, is_pediatric = FALSE) {
  
    # create empty list to save processed diagnostic data
    diag_table_list <- list()
  
    if(is_pediatric==FALSE) {
      population = "adults"
      } else {
      population = "pediatrics"
      }
  
  # for each healthcare system, we will calculate the total number of patients with each ICD code
  for (i in sorted_sites) {
    tmp <- get(i)
    tmp_diag <- tmp[[c(
      "first_hosp_results",
      "icd_tables",
      paste0("icd_tables_", population),
      "demo_table"
    )]]
    tmp_diag$site <- tmp[["site"]]
    diag_table_list[[i]] <- tmp_diag %>%
      filter(variable == "sex.female" | variable == "sex.male" | variable == "sex.other") %>%
      select(site, contains("n_var"))
  }
  
  diag_table_wide <- bind_rows(diag_table_list)
  
  # we will count the total number and percent across healthcare systems
  diag_counts <- colSums(Filter(is.numeric, diag_table_wide), na.rm = TRUE) %>% 
    data.frame() %>% 
    mutate(perc = round(./sample_size * 100, 1))
  
  # format diagnostic table
  diag_counts <- tibble::rownames_to_column(diag_counts, "icd") %>%
    mutate(icd = gsub("n_var_", "", icd)) %>%
    rename(`Number of Patients` = ".") %>%
    full_join(., icds, by = "icd") %>%
    dplyr::arrange(desc(`Number of Patients`)) %>% 
    mutate(`Number of Patients (% of Cohort)` = paste(`Number of Patients`, "(", perc, ")", sep = " ")) %>% 
    select(pns_cns, `Neurological Disease Category`, concept_type, 
           icd, description, `Number of Patients (% of Cohort)`) %>% 
      rename(Code = "icd",
           `Nervous System` = "pns_cns",
           Description = "description",
           `ICD Version` = "concept_type") %>% 
    mutate(`Number of Patients (% of Cohort)` = if_else(`Number of Patients (% of Cohort)` == "NA ( NA )", "0 ( 0 )", `Number of Patients (% of Cohort)`),
           Code = if_else(Code == "NN", "No Neurological Disease (NNC)", Code))
  
  return(diag_counts)
  
  }

```

```{r}
neuro_counts_adult <- compute_neuro_counts(sorted_sites = adult_sites,
                                           sample_size = n_adult, 
                                           is_pediatric = FALSE)

neuro_counts_pediatric <- compute_neuro_counts(sorted_sites = pediatric_sites,
                                           sample_size = n_pediatric, 
                                           is_pediatric = TRUE)

neuro_counts_combined <- neuro_counts_adult %>% 
  rename(`Number of Adult Patients (% of Cohort)` = `Number of Patients (% of Cohort)`) %>% 
  left_join(., neuro_counts_pediatric %>% 
              select(`ICD Version`, Code, `Number of Patients (% of Cohort)`) %>% 
              rename("Number of Pediatric Patients (% of Cohort`)" = `Number of Patients (% of Cohort)`))

write.csv(neuro_counts_combined, paste0("tables/Supp.Table2_diagnoses", ".csv"), row.names = FALSE)
  
```

```{r}
datatable(neuro_counts_adult, caption = "Number (%) of Adult Patients with each Neurological Diagnosis") 
```

```{r}
datatable(neuro_counts_pediatric, caption = "Number (%) of Pediatric Patients with each Neurological Diagnosis") 
```

## Evaluate Diagnoses by Age & Outcome

```{r}
compute_stratified_neuro_counts <- function(tableone, sorted_sites, is_pediatric = FALSE) {
  
  # create empty list to save processed diagnostic data
    diag_table_list <- list()
  
    if(is_pediatric==FALSE) {
      population = "adults"
      } else {
      population = "pediatrics"
      }
  
  # for each healthcare system, we will calculate the total number of patients with each ICD code
  for (i in sorted_sites) {
    #print(i)
    tmp <- get(i)
    tmp_diag <- tmp[[c(
      "first_hosp_results",
      "icd_tables",
      paste0("icd_tables_", population),
      "demo_table"
    )]]
    tmp_diag$site <- tmp[["site"]]
    try(
    diag_table_list[[i]] <- tmp_diag %>%
      select(site, variable, starts_with("n_var")) %>% 
      pivot_longer(cols = starts_with("n_var"), names_to = "code", values_to = "freq") %>% 
      ungroup()
    )
  }

  diag_table_wide_all <- bind_rows(diag_table_list) %>% 
      group_by(variable, code) %>% 
    # total number of patients who had each code for each variable across sites
      mutate(total_n_var  = sum(freq, na.rm = TRUE)) %>% 
    distinct(variable, code, total_n_var) %>% 
    mutate(code = gsub("n_var_", "", code),
           variable = gsub("Severity.", "", variable),
           variable = gsub("Survival.", "", variable),
           variable = gsub("readmitted", "", variable),
           variable = gsub("age_group", "", variable),
           variable = gsub("covid_discharged", "", variable),
           variable = if_else(variable == ".TRUE", "Readmitted", variable)) %>% 
    left_join(., icds %>% rename(code = "icd"), by = "code") %>% 
    ungroup() %>% 
    filter(!concept_type == "icd-9")


  # refactor neuro status
  diag_table_wide_all$pns_cns <- factor(diag_table_wide_all$pns_cns,
                                        levels=c("Central", "Peripheral"),
                                        labels=c("CNS", "PNS"))
  
  
  return(diag_table_wide_all)
  
  }
```

```{r}
stratified_neuro_counts_adult <- compute_stratified_neuro_counts(sorted_sites = adult_sites,
                                                                 is_pediatric = FALSE,
                                                                 tableone = tableone_adult)

stratified_neuro_counts_pediatric <- compute_stratified_neuro_counts(sorted_sites = pediatric_sites,
                                                                 is_pediatric = TRUE,
                                                                 tableone = tableone_pediatric)
```


```{r}
compute_stratified_neuro_counts_age <- function(stratified_neuro_counts, tableone) {
  
  stratified_neuro_counts$variable <- factor(stratified_neuro_counts$variable, 
                                     levels=c(".00to02", ".03to05", ".06to11", ".12to17",
                                              ".18to25", ".26to49", ".50to69", ".70to79", ".80plus"), 
                                     labels=c("0-2 Years", "3-5 Years", "6-11 Years", "12-17 Years",
                                              "18-25 Years", "26-49 Years", "50-69 Years", "70-79 Years", "80+ Years"))
  
  # total counts
  n_0_2 <- get_table_n(tableone, var = "0-2")
  n_3_5 = get_table_n(tableone, var = "3-5")
  n_6_11 = get_table_n(tableone, var = "6-11")
  n_12_17 = get_table_n(tableone, var = "12-17")
  n_18_25 = get_table_n(tableone, var = "18-25")
  n_26_49 = get_table_n(tableone, var = "26-49")
  n_50_69 = get_table_n(tableone, var = "50-69")
  n_70_79 = get_table_n(tableone, var = "70-79")
  n_80 = get_table_n(tableone, var = "80+")
  
  age_df <- data.frame("variable" = c("0-2 Years",
                                    "3-5 Years", 
                                    "6-11 Years", 
                                    "12-17 Years",
                                    "18-25 Years", 
                                    "26-49 Years", 
                                    "50-69 Years", 
                                    "70-79 Years", 
                                    "80+ Years"),
                     "n_var" = c(n_0_2,
                               n_3_5,
                               n_6_11,
                               n_12_17,
                               n_18_25,
                               n_26_49,
                               n_50_69,
                               n_70_79,
                               n_80))

  
  stratified_neuro_counts <- stratified_neuro_counts %>% 
    filter(grepl("Years", variable),
                !code == "NN", 
                !total_n_var == 0) %>% 
         # total_n would be calculating total codes across each age group which is not true reflection of patient counts b/c patients can have more than one code
         #mutate(total_n = sum(total_n_var)) %>% 
         ungroup() %>% 
         left_join(., age_df, by = "variable") %>% 
         mutate(n_var = as.numeric(n_var),
                perc = as.character(round(total_n_var/n_var*100,1)),
                perc = if_else(perc < 0.1, '<0.1', perc),
                perc = paste0(perc, "%")) 
  
  return(stratified_neuro_counts)
  
  
}


```

```{r}
stratified_neuro_counts_age_adult <- compute_stratified_neuro_counts_age(stratified_neuro_counts = stratified_neuro_counts_adult,
                                                                         tableone = tableone_adult)

stratified_neuro_counts_age_pediatric <- compute_stratified_neuro_counts_age(stratified_neuro_counts = stratified_neuro_counts_pediatric,
                                                                 tableone = tableone_pediatric)
```

**Diagnoses by Adult and Pediatric Populations**

```{r fig.height=20, fig.width=15}
n_0_2 <- get_table_n(tableone_combine, var = "0-2")
#n_3_5 <- get_table_n(tableone_combine, var = "3-5")
n_6_11 <- get_table_n(tableone_combine, var = "6-11")
n_12_17 <- get_table_n(tableone_combine, var = "12-17")

total_ped_age_count = as.numeric(n_0_2) +  as.numeric(n_6_11) + as.numeric(n_12_17)

stratified_neuro_counts_age_pediatric_reformat = stratified_neuro_counts_age_pediatric %>%
  select(variable, pns_cns, description, n_var, total_n_var) %>%
  mutate(variable = "0-17 Years") %>%
  group_by(pns_cns, description) %>%
  mutate(total_n_var = sum(total_n_var, na.rm = TRUE)) %>%
  ungroup() %>%
  # n_var is the total per `age_group` stratification
  select(-n_var) %>%
  distinct() %>%
  mutate(perc = round(total_n_var/total_ped_age_count*100,1),
         perc = if_else(perc < 0.1, '<0.1', as.character(perc)),
         perc = paste0(perc, "%"))

stratified_neuro_counts_age_combined <- stratified_neuro_counts_age_adult %>%
  select(variable, pns_cns, description, total_n_var, perc) %>%
  rbind(., stratified_neuro_counts_age_pediatric_reformat %>%
          select(variable, pns_cns, description, total_n_var, perc))

stratified_neuro_counts_age_combined$variable <- factor(stratified_neuro_counts_age_combined$variable,
                                                                      levels=c("0-17 Years", "18-25 Years", "26-49 Years", "50-69 Years", "70-79 Years", "80+ Years"))
```

```{r fig.height=20, fig.width=12}
group.colors <- c(NNC = "gray", PNS = "slateblue", CNS ="tomato")

adult_ped_combined_plot <- ggplot(stratified_neuro_counts_age_combined,
                                                aes(x = total_n_var, y = reorder(description,
                                                                                 total_n_var), 
                                                    fill = pns_cns)) +
  geom_bar(stat = "identity") +
  geom_text(
    aes(x = total_n_var, y = description, label = perc),
    hjust = -0.1,
    #hjust="inward",
    size = 6,
    inherit.aes = TRUE) +
  scale_x_continuous(expand = expansion(mult = c(0,0.2))) + # to ensure percent labels are within the figure
  facet_grid(pns_cns ~ variable, scales = "free", space = "free_y") + 
  scale_y_discrete("", position="left", labels = function(x) str_wrap(x, width = 100)) +   
  scale_fill_manual(values = group.colors) +
  xlab("Number of Patients") +
  theme(legend.position = "none",
        strip.text.y = element_text(size = 40),
        strip.text.x = element_text(size = 35),
        axis.text.y.left = element_text(size = 25, face = "bold"),
        axis.text.x = element_text(size = 25),
        axis.title.x = element_text(size = 35))

ggsave("figures/Fig3_diagnoses_by_age_adult_ped_18group.png", adult_ped_combined_plot, width = 40, height = 19, dpi = 300)
```

![](figures/Fig3_diagnoses_by_age_adult_ped_18group.png){#id .class width=1000 height=800px}

## Evaluate Diagnoses by Outcome

```{r}

stratified_neuro_counts_adult$population = "adult"
stratified_neuro_counts_pediatric$population = "pediatric"

stratified_neuro_counts_combine <- rbind(stratified_neuro_counts_adult, stratified_neuro_counts_pediatric)

## determine total number of patients who died or severe
n_death_adult = get_table_n(df = tableone_adult, var = "Deceased") %>% as.numeric()
n_severe_adult = get_table_n(df = tableone_adult, var = "Severe") %>% as.numeric()

n_death_pediatric = get_table_n(df = tableone_pediatric, var = "Deceased") %>% as.numeric()
n_severe_pediatric = get_table_n(df = tableone_pediatric, var = "Severe") %>% as.numeric()

stratified_neuro_counts_outcomes <- stratified_neuro_counts_combine %>% 
  filter(variable %in% c("Severe", "Deceased"),
         !code == "NN", 
         !total_n_var == 0) %>%          
         # total_n would be calculating total codes across each outcome group which is not true reflection of patient counts b/c patients can have more than one code
         #mutate(total_n = sum(total_n_var)) %>% 
         group_by(variable, description, population) %>% 
  mutate(total_n_var = sum(total_n_var)) %>% 
  ungroup() %>% 
  mutate(perc = case_when(variable == "Severe" & population == "adult" ~ round(total_n_var/n_severe_adult*100,1),
                                 variable == "Deceased" & population == "adult" ~ round(total_n_var/n_death_adult*100,1),
                                 variable == "Severe" & population == "pediatric" ~ round(total_n_var/n_severe_pediatric*100,1),
                                 variable == "Deceased" & population == "pediatric" ~ round(total_n_var/n_death_pediatric*100,1)),
         perc = if_else(perc < 0.1, '<0.1', as.character(perc)),
       perc = paste0(perc, "%")) %>% 
  select(-code, -concept_type) %>% 
  distinct()
```

```{r}
adult_outcomes = ggplot(stratified_neuro_counts_outcomes %>% 
         filter(population == "adult") %>% 
           mutate(variable = as.factor(variable) %>% 
                                          fct_recode(
                                            Mortality = "Deceased")),
       aes(x = total_n_var, y = reorder(description, total_n_var), fill = pns_cns)) +
  geom_bar(stat = "identity") +
 geom_text(
    aes(x = total_n_var, y = description, label = perc),
    hjust = -0.1,
    #hjust="inward",
    size = 5,
    inherit.aes = TRUE) +
  scale_x_continuous(expand = expansion(mult = c(0,0.2))) + 
  facet_grid(pns_cns ~ variable, scales = "free", space = "free_y") + 
  scale_y_discrete("", position="left", labels = function(x) str_wrap(x, width = 100)) +   
  scale_fill_manual(values = group.colors) +
  xlab("Number of Patients") + 
    theme(legend.position = "none",
        strip.text.y = element_text(size = 35),
        strip.text.x = element_text(size = 30),
        axis.text.y.left = element_text(size = 20, face = "bold"),
        axis.text.x = element_text(size = 15),
        axis.title.x = element_text(size = 35))

ggsave("figures/SuppFig1_diagnoses_by_outcomes_adult.png", adult_outcomes, width = 25, height = 15, dpi = 300)
```

```{r}
pediatric_outcomes = ggplot(stratified_neuro_counts_outcomes %>% 
         filter(population == "pediatric") %>% 
           mutate(variable = as.factor(variable) %>% 
                                          fct_recode(
                                            Mortality = "Deceased")),
       aes(x = total_n_var, y = reorder(description, total_n_var), fill = pns_cns)) +
  geom_bar(stat = "identity") +
 geom_text(
    aes(x = total_n_var, y = description, label = perc),
    hjust = -0.1,
    #hjust="inward",
    size = 5,
    inherit.aes = TRUE) +
  scale_x_continuous(expand = expansion(mult = c(0,0.2))) + 
  facet_grid(pns_cns ~ variable, scales = "free") + 
  scale_y_discrete("", position="left", labels = function(x) str_wrap(x, width = 100)) +   
  scale_fill_manual(values = group.colors) +
  xlab("") + 
  theme(legend.position = "none",
        strip.text.y = element_text(size = 35),
        strip.text.x = element_text(size = 30),
        axis.text.y.left = element_text(size = 20, face = "bold"),
        axis.text.x = element_text(size = 15),
        axis.title.x = element_text(size = 35))

ggsave("figures/SuppFig1_diagnoses_by_outcomes_pediatric.png", pediatric_outcomes, width = 25, height = 9, dpi = 300)
```


```{r eval=FALSE, include=TRUE}
#grid.arrange(pediatric_outcomes, adult_outcomes, ncol=1)

ggsave("figures/SuppFig1_diagnoses_by_outcome_combined.png", grid.arrange(pediatric_outcomes, adult_outcomes, ncol=1), width = 25, height = 25, dpi = 300)
```

![](figures/SuppFig1_diagnoses_by_outcome_combined.png){#id .class width=800 height=800px}

## Evaluate patients with "Both" CNS and PNS diseases

These patients were excluded from the primary analysis due to potential confounding.

```{r}
# create empty list to store the counts of "both" patients
both_counts_list <- list()

# for each healthcare system, retrieve the number of excluded "both" patients recorded
for (i in sorted_sites) {
  n_both <- results[[paste(i)]][["first_hosp_results"]][["both_counts"]]
  
  both_counts_list[[i]] <- n_both
}

both_counts <- bind_rows(both_counts_list) %>% t() %>% data.frame() %>% sum()

print(paste(both_counts, "(", round(both_counts/(as.numeric(tableone_combine$N[1])+both_counts)*100,2), "%)")) 
```

# **Comorbidity Analysis**

Create comorbidity count table

```{r}
comorbidity_table_adult <- comorb_table_wide %>%
  select(Comorbidity, population, Comorb_Total, `NNC_N_%`, `CNS_N_%`, `PNS_N_%`) %>%
  filter(population == "Adult") %>% 
  rename(N = "Comorb_Total",
         NNC = "NNC_N_%",
         CNS = "CNS_N_%",
         PNS = "PNS_N_%") %>% 
  mutate(N = paste0(N, " (", round(N/n_adult, 2)*100, "%)"))

comorbidity_table_pediatric <- comorb_table_wide %>%
  select(Comorbidity, population, Comorb_Total, `NNC_N_%`, `CNS_N_%`, `PNS_N_%`) %>%
  filter(population == "Pediatric") %>% 
  rename(N = "Comorb_Total",
         NNC = "NNC_N_%",
         CNS = "CNS_N_%",
         PNS = "PNS_N_%") %>% 
  mutate(N = paste0(N, " (", round(N/n_pediatric, 2)*100, "%)"))

supp_comorbidity_table <- rbind(comorbidity_table_adult, comorbidity_table_pediatric) %>% 
  arrange(Comorbidity)

write.csv(supp_comorbidity_table, "tables/SuppTable_Comorbidity_Counts.csv", row.names = FALSE)
```

Create vector counts of patients with comorbidity data 

```{r}
neuro_pt_counts_sum_adult <- neuro_pt_counts %>% 
  filter(population == "Adult") %>% 
  select(-population)

neuro_pt_counts_sum_pediatric <- neuro_pt_counts %>% 
  filter(population == "Pediatric") %>% 
  select(-population)

none_n_adult = neuro_pt_counts_sum_adult$None_n
cns_n_adult = neuro_pt_counts_sum_adult$CNS_n
pns_n_adult = neuro_pt_counts_sum_adult$PNS_n

none_n_pediatric = neuro_pt_counts_sum_pediatric$None_n
cns_n_pediatric = neuro_pt_counts_sum_pediatric$CNS_n
pns_n_pediatric = neuro_pt_counts_sum_pediatric$PNS_n
```

### Calculate Relative Risk (RR)

```{r}
calculate_rr <- function(is_pediatric = FALSE) {
  
  if(is_pediatric == FALSE) {
    cns_n = cns_n_adult
    pns_n = pns_n_adult
    neuro_pt_counts_sum = neuro_pt_counts_sum_adult
    pop = "Adult"
    # create a list of comorbidities
    list_of_comorbs <- comorb_table_wide %>% 
      filter(population == paste(pop)) %>% 
      distinct(Comorbidity) 
    list_of_comorbs <- list_of_comorbs$Comorbidity
    } else  {
    cns_n = cns_n_pediatric
    pns_n = pns_n_pediatric
    neuro_pt_counts_sum = neuro_pt_counts_sum_pediatric
    pop = "Pediatric"
    # create a list of comorbidities
    list_of_comorbs <- comorb_table_wide %>% 
      filter(population == paste(pop)) %>% 
      distinct(Comorbidity) %>% 
      as.vector()
    list_of_comorbs <- list_of_comorbs$Comorbidity
  }
  
  
  # create empty list to save relative risk calculations
  rr_calcs <- list()
  
  # for each comorbidity, we will calculate the relative risk of having a CNS and PNS diagnosis
  for (i in list_of_comorbs) {
  
    # https://www.cdc.gov/csels/dsepd/ss1978/lesson3/section5.html
    # create matrix as:
    # exposed group - with outcome, without outcome
    # non exposed group - with outcome, without outcome
  
    # where variables ending with `_Total` are the total number of CNS, PNS, or NNC patients with the respective comorbidity.
    # variables ending with `_n` are the total numbers in the entire population
  
    # to calculate confidence intervals
  #https://sphweb.bumc.bu.edu/otlt/mph-modules/bs/bs704_confidence_intervals/bs704_confidence_intervals8.html
  
    rr_cns <- comorb_table_wide %>%
      filter(population == paste(pop),
             Comorbidity == paste(i)) %>%
      mutate(a = CNS_Total, # number CNS w/comorb
             b = Comorb_Total - CNS_Total, # number of comorb pts w/o CNS
             c = cns_n - CNS_Total, # number of CNS pts w/o comorb
             d = sum(neuro_pt_counts_sum) - Comorb_Total - c, # number of pts w/o comorb or CNS
             risk_cns = a/(a+b),
             risk_non_cns = c/(c+d),
             rr_CNS = risk_cns/risk_non_cns,
             # for confidence intervals
             ci_1 = (b/a)/(a+b), # (n1-x1)/n1 
             ci_2 = (d/c)/(c+d), # (n2-x2)/n2
             ci_root = sqrt(ci_1 + ci_2),
             ci_lower = log(rr_CNS) - (1.96*ci_root),
             ci_upper = log(rr_CNS) + (1.96*ci_root),
             l_ci_cns = exp(ci_lower),
             u_ci_cns = exp(ci_upper)) %>%
      select(Comorbidity, Comorb_Total, None_Total, CNS_Total, PNS_Total, rr_CNS, l_ci_cns, u_ci_cns)
  
      rr_pns <- comorb_table_wide %>%
      filter(population == paste(pop),
             Comorbidity == paste(i)) %>%
      mutate(a = PNS_Total, # number PNS w/comorb
             b = Comorb_Total - PNS_Total, # number of comorb pts w/o PNS
             c = pns_n - PNS_Total, # number of PNS pts w/o comorb
             d = sum(neuro_pt_counts_sum) - Comorb_Total - c, # number of pts w/o comorb or PNS
             risk_pns = a/(a+b),
             risk_non_pns = c/(c+d),
             rr_PNS = risk_pns/risk_non_pns,
             # for confidence intervals
             ci_1 = (b/a)/(a+b),
             ci_2 = (d/c)/(c+d),
             ci_root = sqrt(ci_1 + ci_2),
             ci_lower = log(rr_PNS) - (1.96*ci_root),
             ci_upper = log(rr_PNS) + (1.96*ci_root),
             l_ci_pns = exp(ci_lower),
             u_ci_pns = exp(ci_upper)) %>%
      select(Comorbidity, rr_PNS, l_ci_pns, u_ci_pns)
  
      rr <- rr_cns %>%
        left_join(., rr_pns, by = c("Comorbidity"))
  
    rr_calcs[[i]] <- rr
  
  }
  
  rr_results <- bind_rows(rr_calcs) %>%
      mutate(rr_CNS = round(rr_CNS, 2),
           l_ci_cns = round(l_ci_cns, 2),
           u_ci_cns = round(u_ci_cns, 2),
           rr_PNS = round(rr_PNS, 2),
           l_ci_pns = round(l_ci_pns, 2),
           u_ci_pns = round(u_ci_pns, 2),
           `RR CNS (95% CI)` = paste(rr_CNS, "(", l_ci_cns, ",", u_ci_cns, ")"),
           `RR PNS (95% CI)` = paste(rr_PNS, "(", l_ci_pns, ",", u_ci_pns, ")")) %>%
    select(Comorbidity, rr_CNS, l_ci_cns, u_ci_cns, `RR CNS (95% CI)`,
           rr_PNS, l_ci_pns, u_ci_pns, `RR PNS (95% CI)`)
  
  # tidy up data
  write.csv(rr_results %>%
              arrange(desc(rr_CNS)) %>%
              select(Comorbidity, `RR CNS (95% CI)`, `RR PNS (95% CI)`), 
            paste0("tables/Supp.Table4_RR_", pop, ".csv"), row.names = FALSE)
  
    # reformat data
  rr_results_tidy_rr <- rr_results %>%
    select(Comorbidity, rr_CNS, rr_PNS) %>%
    pivot_longer(cols = rr_CNS:rr_PNS, names_to = "Neuro Status", values_to = "RR") %>%
    mutate(`Neuro Status` = if_else(`Neuro Status` == "rr_CNS", "CNS", "PNS"))
  
  rr_results_tidy_l_ci <- rr_results %>%
    select(Comorbidity, l_ci_cns, l_ci_pns) %>%
    pivot_longer(cols = l_ci_cns:l_ci_pns, names_to = "Neuro Status", values_to = "l_CI") %>%
    mutate(`Neuro Status` = if_else(`Neuro Status` == "l_ci_cns", "CNS", "PNS"))
  
  rr_results_tidy_u_ci <- rr_results %>%
    select(Comorbidity, u_ci_cns, u_ci_pns) %>%
    pivot_longer(cols = u_ci_cns:u_ci_pns, names_to = "Neuro Status", values_to = "u_CI") %>%
    mutate(`Neuro Status` = if_else(`Neuro Status` == "u_ci_cns", "CNS", "PNS"))
  
  # combine all tidy results
  rr_results_tidy <- rr_results_tidy_rr %>%
    left_join(., rr_results_tidy_l_ci,
                               by = c("Comorbidity", "Neuro Status")) %>%
    left_join(., rr_results_tidy_u_ci,
                               by = c("Comorbidity", "Neuro Status"))
  
  # order by CNS diagnoses with higher RR
  cns_top_results <- rr_results_tidy %>%
    filter(`Neuro Status` == "CNS") %>%
    arrange(RR) %>%
    select(Comorbidity)
  
  rr_results_tidy$Comorbidity <-  ordered(rr_results_tidy$Comorbidity, levels = cns_top_results$Comorbidity)
  
  cns_rr_plot <- ggplot(rr_results_tidy %>%
           filter(`Neuro Status` == "CNS"),
         aes(x = RR, y = Comorbidity)) +
    geom_vline(aes(xintercept = 1), size = .25, linetype = "dashed") +
      geom_errorbarh(aes(xmax = u_CI, xmin = l_CI), size = 0.7, height = 0.2, color = "gray50") +
      geom_point(size = 1.5, color = "tomato") +
      scale_x_continuous(breaks = seq(0, 4, 1), labels = seq(0, 4, 1),
                         limits = c(0,4)) +
    ylab("") +
    xlab("Relative Risk") +
    ggtitle("CNS Patients") + 
    theme(axis.text.y = element_text(face = "bold"))
  
  # arrange by top PNS results
  pns_top_results <- rr_results_tidy %>%
    filter(`Neuro Status` == "PNS") %>%
    arrange(RR) %>%
    select(Comorbidity)
  
  rr_results_tidy$Comorbidity <-  ordered(rr_results_tidy$Comorbidity, levels = pns_top_results$Comorbidity)
  
  
  pns_rr_plot <- ggplot(rr_results_tidy %>%
           filter(`Neuro Status` == "PNS"),
         aes(x = RR, y = Comorbidity)) +
    geom_vline(aes(xintercept = 1), size = .25, linetype = "dashed") +
      geom_errorbarh(aes(xmax = u_CI, xmin = l_CI), size = 0.70, height = 0.2, color = "gray50") +
      geom_point(size = 1.5, color = "slateblue") +
      scale_x_continuous(breaks = seq(0, 1.5, 0.5), labels = seq(0, 1.5, 0.5),
                         limits = c(0,1.5)) +
    ylab("") +
    xlab("Relative Risk") +
    ggtitle("PNS Patients") + 
    theme(axis.text.y = element_text(face = "bold"))
  
  pns_rr_plot <- set_panel_size(pns_rr_plot,
                            width  = unit(7, "cm"),
                            height = unit(6.5, "in"))
  
  cns_rr_plot <- set_panel_size(cns_rr_plot,
                            width  = unit(7, "cm"),
                            height = unit(6.5, "in"))
  
  #grid.arrange(cns_rr_plot, pns_rr_plot, ncol=2)
  
  ggsave(paste0("figures/Figure4.relative_risk_", pop, ".png"), grid.arrange(cns_rr_plot, pns_rr_plot, ncol=2), width = 15, height = 8, dpi = 300)
  
  }
```

```{r eval=TRUE, include=FALSE}
rr_adult <- calculate_rr(is_pediatric = FALSE)
#rr_pediatric <- calculate_rr(is_pediatric = TRUE)
```

![](figures/Figure4.relative_risk_Adult.png){#id .class width=1000 height=800px}


### LPCA deviance explained

How much deviance is explained with 10 principal components for 30 comorbidity types?

Across all healthcare systems, 10 principal components explained above 75% of the deviance.

```{r}
compare_deviance <- function(sorted_sites, is_pediatric=FALSE) {
  
  pca_dev <- list()
  
  if(is_pediatric==FALSE) {
    population = "adults"
    sorted_sites = sorted_sites[!sorted_sites%in% c("GOSH_results", "BCH_results")]
    } else {
    population = "pediatrics"
    }

  # for each healthcare system, we will calculate the total number of patients with each ICD code
  for (i in sorted_sites) {
    print(i)
    tmp <- get(i)
    tmp_pca <- tmp[[c(
      "comorbidities",
      paste0("comorb_", population),
      "deviance_expl"
    )]] %>% 
      data.frame() %>% 
      rename(dev_expl = '.')
    tmp_pca$site <- tmp[["site"]]
    pca_dev[[i]] <- tmp_pca
    
  }
  
  pca_df <- bind_rows(pca_dev)
  
  return(pca_df)
  
}

pca_adult <- compare_deviance(sorted_sites = adult_sites, is_pediatric = FALSE)


pca_data <- bind_rows(pca_adult)


theme_set(theme_classic())

pca_plot <- ggplot(pca_data, 
       aes(x = fct_reorder(site, -dev_expl), y = dev_expl, group = 1)) +
  geom_point(size = 2.5) +
  geom_line() +
  xlab("Healthcare System") + 
  ylab("Proportion of Deviance explained") +  
  ylim(0,1) + 
  theme(axis.text.x=element_text(angle=90, hjust=1, face = "bold", size = 12),
        axis.text.y=element_text(face = "bold"), 
        axis.title.y = element_text(face = "bold", size = 15)); pca_plot

ggsave("figures/SuppFig_pca_deviance.png", pca_plot, height = 6, width = 6)
```
